diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -7,12 +7,9 @@ from sklearn.linear_model import LinearRegression, LogisticRegression
 from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
 from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, RandomForestClassifier, GradientBoostingClassifier
 from sklearn.svm import SVR, SVC
-from sklearn.decomposition import PCA #Import at top
-from sklearn.metrics import silhouette_score #Import at top
-from sklearn.cluster import DBSCAN #Import at top
-from sklearn.feature_selection import SelectKBest #Import at top
-import joblib #Import at top
-from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
+from sklearn.feature_selection import SelectKBest
+import joblib
+from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, accuracy_score, precision_score, recall_score, f1_score
 from sklearn.impute import KNNImputer, SimpleImputer
 from sklearn.preprocessing import RobustScaler, StandardScaler, OneHotEncoder
 from sklearn.compose import ColumnTransformer
@@ -20,847 +17,846 @@ from sklearn.pipeline import Pipeline
 from ydata_profiling import ProfileReport
 from streamlit_pandas_profiling import st_profile_report
 from io import StringIO
-import joblib
 import requests
 import asyncio
 from io import BytesIO
 import base64
-import seaborn as sns
-import time
-from sklearn.cluster import KMeans
-import scipy.stats as stats
 import mimetypes
 import matplotlib.pyplot as plt
 from sklearn.model_selection import learning_curve
 
-# Configurations
-st.set_page_config(page_title="Executive Insights Pro", layout="wide", page_icon="📈")
-
-# ----Load Image----
-@st.cache_data(ttl=3600)
-def load_image(image_url):
-    """Loads an image from a URL and returns bytes."""
-    try:
-        response = requests.get(image_url, stream=True)
-        response.raise_for_status()
-        return response.content
-    except requests.exceptions.RequestException as e:
-        st.error(f"Error loading image: {e}")
-        return None
-
-# ----Function to make and convert background to base 64 code-----
-def set_background():
-    """Sets the background image using base64 encoding."""
-    image_url = "https://www.nasa.gov/sites/default/files/thumbnails/image/web_first_images_release.png"  # NASA Image
-    image_data = load_image(image_url)
-    if image_data:
-        # Convert bytes to base64
-        image_base64 = base64.b64encode(image_data).decode()
-        st.markdown(
-            f"""
-            <style>
-            .stApp {{
-                background-image: url(data:image/png;base64,{image_base64});
-                background-size: cover;
-                background-position: center center;
-                background-attachment: fixed;
-            }}
-            </style>
-            """,
-            unsafe_allow_html=True,
-        )
-    return
-
-# Simplified CSS
-def apply_simplified_theme():
-    """Injects simplified CSS to enhance Streamlit's default style."""
-    st.markdown(
-        """
-        <style>
-        [data-testid="stSidebar"] {
-            background-color: rgba(52, 73, 94, 0.9);
-            color: white;
-        }
-        .main h1, .main h2, .main h3, .main h4, .main h5, .main h6 {
-            color: #5396C6;
-        }
-        .st-bb, .st-ae, .st-bv {
-            background-color: rgba(20, 20, 30, 0.3);
-            box-shadow: 1px 1px 5px #4e4e4e;
-        }
-        </style>
-        """,
-        unsafe_allow_html=True,
-    )
-    return
-
-# Apply background and simplified theme
-set_background()
-apply_simplified_theme()
-
-def show_loader(message="Loading..."):
-    """Displays an animated loader."""
-    st.markdown(
-        f"""
-        <div style="display: flex; align-items: center; justify-content: center; margin-top: 20px;">
-            <div class="loader"></div>
-            <span style="margin-left: 10px; color: #00f7ff;">{message}</span>
-        </div>
-        """,
-        unsafe_allow_html=True
-    )
+# Enhanced configuration
+st.set_page_config(
+    page_title="Executive Insights Pro",
+    layout="wide",
+    page_icon="📈",
+    initial_sidebar_state="expanded"
+)
 
-@st.cache_data(ttl=3600) #Added allow_output_mutation
-def load_data(uploaded_file):
-    """Load and cache dataset, with file type validation."""
-    if uploaded_file is not None:
-        file_extension = uploaded_file.name.split(".")[-1].lower()
-        mime_type = mimetypes.guess_type(uploaded_file.name)[0]
-
-        max_file_size_mb = 50  # Set a maximum file size (adjust as needed)
-        file_size_mb = uploaded_file.size / (1024 * 1024)
-        if file_size_mb > max_file_size_mb:
-            st.error(f"File size exceeds the limit of {max_file_size_mb} MB.")
-            return None
-
-
-        try:  # Wrap file reading in a try...except
-            if file_extension == "csv" or mime_type == 'text/csv':
-                df = pd.read_csv(uploaded_file)
-                return df
-            elif file_extension in ["xlsx", "xls"] or mime_type in ['application/vnd.ms-excel', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet']:
-                df = pd.read_excel(uploaded_file)
-                return df
-            else:
-                st.error("Unsupported file type. Please upload a CSV or Excel file.")
-                return None
-        except FileNotFoundError:
-            st.error("File not found. Please check the file path.")
-        except pd.errors.ParserError:  # Catch pandas-specific parsing errors
-            st.error("Error parsing the file.  Make sure it's a valid CSV or Excel file.")
-        except Exception as e:
-            st.error(f"An unexpected error occurred: {type(e).__name__} - {str(e)}")
-            return None  # Handle other potential exceptions
+# Security: Set allowed file types
+ALLOWED_EXTENSIONS = {'csv', 'xlsx', 'parquet', 'feather'}
+MAX_FILE_SIZE_MB = 250  # 250MB limit
 
-    else:
-        return None
-
-@st.cache_data(ttl=3600)
-def generate_profile(df):
-    """Generate automated EDA report"""
-    return ProfileReport(df, minimal=True)
-
-# Session State Management
-if 'raw_data' not in st.session_state:
-    st.session_state.raw_data = None
-if 'cleaned_data' not in st.session_state:
-    st.session_state.cleaned_data = None
-if 'train_test' not in st.session_state:
-    st.session_state.train_test = {}
-if 'model' not in st.session_state:
-    st.session_state.model = None
-if 'preprocessor' not in st.session_state:
-    st.session_state.preprocessor = None # to store the column transformer
-
-# Sidebar Navigation
-st.sidebar.title("🔮 Data Wizard Pro")
-
-# Apply custom CSS to change text color in the sidebar
-st.markdown(
-    """
-    <style>
-    [data-testid="stSidebar"] {
-        color: #00f7ff; /* Cyan color for sidebar text */
+def validate_file(file):
+    """Comprehensive file validation"""
+    if not file:
+        return False, "No file uploaded"
+    
+    extension = file.name.split('.')[-1].lower()
+    if extension not in ALLOWED_EXTENSIONS:
+        return False, f"Unsupported file type: {extension}"
+    
+    file_size_mb = file.size / (1024 * 1024)
+    if file_size_mb > MAX_FILE_SIZE_MB:
+        return False, f"File size exceeds {MAX_FILE_SIZE_MB}MB limit"
+    
+    return True, ""
+
+@st.cache_data(ttl=3600, show_spinner="Analyzing data quality...")
+def enhanced_quality_report(df):
+    """Generate comprehensive data quality report"""
+    report = {
+        'basic_stats': {
+            'rows': df.shape[0],
+            'columns': df.shape[1],
+            'missing_values': df.isna().sum().sum(),
+            'duplicates': df.duplicated().sum()
+        },
+        'column_analysis': {},
+        'data_health_score': 100  # Starting score
     }
-    </style>
-    """,
-    unsafe_allow_html=True,
-)
 
-# Replace the existing app_mode section with this:
-app_mode = st.sidebar.radio("Navigate", [
-    "Data Upload",
-    "Smart Cleaning",
-    "Advanced EDA",
-    "Model Training",
-    "Predictions",
-    "Visualization Lab",
-    "Neural Network Studio"  # New option
-])
-
-# --- Main App Logic ---
-if app_mode == "Data Upload":
-    st.title("📤 Data Upload & Initial Analysis")
-
-    # File Upload Section with improved styling
-    st.markdown(
-        """
-        <style>
-        .stFileUploader label {
-            color: #00f7ff !important; /* Cyan color for the label */
+    for col in df.columns:
+        col_report = {
+            'type': str(df[col].dtype),
+            'unique': df[col].nunique(),
+            'missing': df[col].isna().sum(),
+            'samples': df[col].dropna().sample(3).tolist() if df[col].dtype == 'object' else []
         }
-        .stFileUploader div div div {
-            background-color: #141422 !important; /* Dark background */
-            color: #e0e0ff !important; /* Light text */
-            border: 1px solid #00f7ff !important; /* Cyan border */
-            border-radius: 10px;
-        }
-        </style>
-        """,
-        unsafe_allow_html=True,
-    )
 
+        # Numeric specific checks
+        if pd.api.types.is_numeric_dtype(df[col]):
+            col_report.update({
+                'mean': df[col].mean(),
+                'std': df[col].std(),
+                'zeros': (df[col] == 0).sum(),
+                'negatives': (df[col] < 0).sum() if df[col].dtype != 'uint' else 0,
+                'outliers': detect_outliers(df[col])
+            })
+            report['data_health_score'] -= 2  # Deduct 2% per numeric column
+
+        # Categorical specific checks
+        if pd.api.types.is_string_dtype(df[col]):
+            col_report.update({
+                'top_value': df[col].mode()[0] if not df[col].empty else None,
+                'top_freq': df[col].value_counts().iloc[0]/len(df) if not df[col].empty else 0
+            })
+            report['data_health_score'] -= 1  # Deduct 1% per string column
+
+        report['column_analysis'][col] = col_report
+        report['data_health_score'] = max(report['data_health_score'], 0)
+
+    return report
+
+def detect_outliers(series):
+    """Detect outliers using IQR method"""
+    q1 = series.quantile(0.25)
+    q3 = series.quantile(0.75)
+    iqr = q3 - q1
+    return ((series < (q1 - 1.5 * iqr)) | (series > (q3 + 1.5 * iqr))).sum()
+
+# --- Data Upload Page ---
+if app_mode == "Data Upload":
+    st.title("📤 Smart Data Hub")
+    st.markdown("""
+        **Upload your dataset** (CSV, Excel, Parquet) for comprehensive analysis.
+        Get instant data health insights and quality assessment.
+    """)
+
+    # File upload with enhanced UI
     uploaded_file = st.file_uploader(
-        "Choose a CSV or Excel file", type=["csv", "xlsx"],
-        help="Upload your dataset here. Supported formats: CSV, XLSX"
+        "Drag & drop or browse files",
+        type=list(ALLOWED_EXTENSIONS),
+        help=f"Max file size: {MAX_FILE_SIZE_MB}MB. Supported formats: {', '.join(ALLOWED_EXTENSIONS)}"
     )
-    
+
     if uploaded_file:
-        df = load_data(uploaded_file)
-        if df is not None:
-            # only proceed if load_data returned a valid dataframe
-            st.session_state.raw_data = df
-            st.session_state.cleaned_data = df.copy()
-            
-            st.subheader("Data Overview")
-            # Data Overview Cards with more context
-            col1, col2, col3 = st.columns(3)
-            with col1:
-                st.metric("Number of Rows", df.shape[0], help="Total number of entries in the dataset.")
-            with col2:
-                st.metric("Number of Columns", df.shape[1], help="Total number of features in the dataset.")
-            with col3:
-                num_missing = df.isna().sum().sum()
-                st.metric("Total Missing Values", num_missing, help="Total number of missing entries across the entire dataset.")
-            
-            # Display Data Types
-            st.write("Column Data Types:")
-            dtype_counts = df.dtypes.value_counts().to_dict()
-            for dtype, count in dtype_counts.items():
-                st.write(f"- {dtype}: {count} column(s)")
+        # Validate file
+        is_valid, message = validate_file(uploaded_file)
+        if not is_valid:
+            st.error(f"Upload error: {message}")
+            st.stop()
+
+        # Load data with progress
+        with st.spinner(f"Loading {uploaded_file.name}..."):
+            try:
+                if uploaded_file.name.endswith('.csv'):
+                    df = pd.read_csv(uploaded_file, low_memory=False)
+                elif uploaded_file.name.endswith(('.xlsx', '.xls')):
+                    df = pd.read_excel(uploaded_file)
+                elif uploaded_file.name.endswith('.parquet'):
+                    df = pd.read_parquet(uploaded_file)
+                elif uploaded_file.name.endswith('.feather'):
+                    df = pd.read_feather(uploaded_file)
+                
+                st.session_state.raw_data = df
+                st.success("Dataset loaded successfully!")
+                
+            except Exception as e:
+                st.error(f"Error loading file: {str(e)}")
+                st.stop()
+
+        # Data Health Dashboard
+        st.subheader("📊 Data Health Dashboard")
+        report = enhanced_quality_report(df)
+        
+        col1, col2, col3, col4 = st.columns(4)
+        col1.metric("Total Rows", report['basic_stats']['rows'])
+        col2.metric("Total Columns", report['basic_stats']['columns'])
+        col3.metric("Missing Values", report['basic_stats']['missing_values'])
+        col4.metric("Data Health Score", f"{report['data_health_score']}/100")
+
+        # Column Explorer
+        with st.expander("🔍 Deep Column Analysis", expanded=True):
+            selected_col = st.selectbox("Select column to inspect", df.columns)
+            col_info = report['column_analysis'][selected_col]
             
-            # Sample Data Table with improved display
-            st.subheader("Sample Data")
-            num_rows_preview = st.slider("Number of Rows to Preview", 5, 20, 10, help="Adjust the number of rows displayed in the sample data.")
-            st.dataframe(df.head(num_rows_preview), use_container_width=True)
+            st.write(f"**Type:** {col_info['type']}")
+            st.write(f"**Unique Values:** {col_info['unique']}")
+            st.write(f"**Missing Values:** {col_info['missing']} ({col_info['missing']/len(df):.1%})")
             
-            # Column Statistics
-            with st.expander("📊 Column Statistics"):
-                for col in df.columns:
-                    st.subheader(f"Column: {col}")
-                    st.write(f"Data type: {df[col].dtype}")
-                    if pd.api.types.is_numeric_dtype(df[col]):
-                        st.write("Summary Statistics:")
-                        st.write(df[col].describe())
-                    else:
-                        st.write("Value Counts:")
-                        st.write(df[col].value_counts())
+            if pd.api.types.is_numeric_dtype(df[selected_col]):
+                st.write("**Distribution:**")
+                st.line_chart(df[selected_col])
+                st.write(f"**Outliers Detected:** {col_info['outliers']}")
+            else:
+                st.write("**Most Common Values:**")
+                top_values = df[selected_col].value_counts().head(5)
+                st.bar_chart(top_values)
+
+        # Smart Recommendations
+        with st.expander("💡 Cleaning Recommendations"):
+            recommendations = []
+            if report['basic_stats']['duplicates'] > 0:
+                recommendations.append(f"🚨 Remove {report['basic_stats']['duplicates']} duplicate rows")
+            if report['basic_stats']['missing_values'] > 0:
+                recommendations.append("🔧 Apply advanced imputation strategies")
+            for col, data in report['column_analysis'].items():
+                if data['missing'] > 0.5 * len(df):
+                    recommendations.append(f"⚠️ Consider dropping {col} (>{50}% missing)")
+                if data['unique'] == len(df):
+                    recommendations.append(f"🔍 Investigate {col} - potential unique identifier")
             
-            # Automated EDA Report
-            with st.expander("🚀 Automated Data Report"):
-                if st.button("Generate Smart Report"):
-                    show_loader("Generating EDA Report")
-                    pr = generate_profile(df)
-                    st_profile_report(pr)
+            if recommendations:
+                st.write("### Recommended Actions")
+                for rec in recommendations[:5]:  # Show top 5
+                    st.write(f"- {rec}")
+            else:
+                st.success("No critical issues detected - your data looks healthy!")
 
+        # Data Preview
+        with st.expander("🔎 Data Preview", expanded=True):
+            preview_size = st.slider("Preview rows", 5, 100, 15)
+            st.dataframe(df.head(preview_size).style.highlight_null(color='#FF6666')
+
+        # Advanced Profiling
+        if st.button("🚀 Generate Full Data Profile"):
+            with st.spinner("Generating comprehensive report..."):
+                pr = ProfileReport(df, explorative=True)
+                st_profile_report(pr)
+
+# Smart Cleaning Section
 elif app_mode == "Smart Cleaning":
     st.title("🧼 Intelligent Data Cleaning")
-    if st.session_state.raw_data is not None:
-        df = st.session_state.cleaned_data
-
-        # Cleaning Toolkit
-        col1, col2 = st.columns([1, 3])
-        with col1:
-            st.subheader("Cleaning Actions")
-
-            clean_action = st.selectbox("Choose Operation", [
-                "Handle Missing Values",
-                "Clean Text",
-                "Remove Columns",  # New option
-                # ... other cleaning operations ...
+    st.markdown("""
+        **Automated Data Cleaning** with smart suggestions and advanced transformations.
+        Clean your data with confidence using AI-powered recommendations.
+    """)
+
+    if 'raw_data' not in st.session_state or st.session_state.raw_data is None:
+        st.warning("Please upload your data in the Data Upload section first.")
+        st.stop()
+
+    df = st.session_state.raw_data.copy()
+    cleaning_actions = []
+
+    # Data Health Summary
+    st.subheader("📊 Data Health Summary")
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        missing_pct = df.isna().mean().mean()
+        st.metric("Missing Values", f"{missing_pct:.1%}")
+    with col2:
+        duplicates = df.duplicated().sum()
+        st.metric("Duplicates", duplicates)
+    with col3:
+        data_types = df.dtypes.value_counts().to_dict()
+        st.metric("Data Types", str(data_types))
+
+    # Cleaning Operations
+    st.subheader("🔧 Cleaning Operations")
+    
+    # 1. Missing Value Handling
+    with st.expander("🕳️ Handle Missing Values", expanded=True):
+        missing_cols = df.columns[df.isna().any()].tolist()
+        if missing_cols:
+            st.write("Columns with missing values:")
+            cols = st.multiselect("Select columns to clean", missing_cols, default=missing_cols)
+            
+            method = st.radio("Imputation Method", [
+                "Drop Missing", 
+                "Mean/Median/Mode", 
+                "KNN Imputation", 
+                "Advanced Imputation"
+            ], horizontal=True)
+            
+            if method == "Mean/Median/Mode":
+                strategy = st.selectbox("Strategy", ["mean", "median", "most_frequent"])
+                if st.button("Apply Imputation"):
+                    df[cols] = df[cols].fillna(df[cols].agg(strategy))
+                    cleaning_actions.append(f"Filled missing values in {cols} using {strategy}")
+            
+            elif method == "KNN Imputation":
+                n_neighbors = st.slider("Number of neighbors", 2, 15, 5)
+                if st.button("Apply KNN Imputation"):
+                    from sklearn.impute import KNNImputer
+                    imputer = KNNImputer(n_neighbors=n_neighbors)
+                    df[cols] = imputer.fit_transform(df[cols])
+                    cleaning_actions.append(f"Applied KNN imputation (k={n_neighbors}) on {cols}")
+            
+            elif method == "Advanced Imputation":
+                st.write("Coming soon: MICE, Deep Learning imputation")
+        else:
+            st.success("No missing values found!")
+
+    # 2. Duplicate Handling
+    with st.expander("🔄 Handle Duplicates", expanded=True):
+        if duplicates > 0:
+            st.write(f"Found {duplicates} duplicate rows")
+            dup_strategy = st.radio("Duplicate Strategy", [
+                "Remove All Duplicates",
+                "Keep First Occurrence", 
+                "Keep Last Occurrence"
             ])
+            
+            if st.button("Handle Duplicates"):
+                df = df.drop_duplicates(keep={
+                    "Remove All Duplicates": False,
+                    "Keep First Occurrence": 'first',
+                    "Keep Last Occurrence": 'last'
+                }[dup_strategy])
+                cleaning_actions.append(f"Removed duplicates using strategy: {dup_strategy}")
+        else:
+            st.success("No duplicates found!")
+
+    # 3. Data Type Conversion
+    with st.expander("🔄 Convert Data Types", expanded=True):
+        st.write("Current Data Types:")
+        st.dataframe(df.dtypes.reset_index().rename(columns={
+            0: 'Type',
+            'index': 'Column'
+        }))
+        
+        col_to_convert = st.selectbox("Select column to convert", df.columns)
+        new_type = st.selectbox("New Data Type", [
+            "String", "Integer", "Float", 
+            "Boolean", "Datetime", "Category"
+        ])
+        
+        if st.button("Convert Data Type"):
+            try:
+                if new_type == "String":
+                    df[col_to_convert] = df[col_to_convert].astype(str)
+                elif new_type == "Integer":
+                    df[col_to_convert] = pd.to_numeric(df[col_to_convert], errors='coerce').astype('Int64')
+                elif new_type == "Float":
+                    df[col_to_convert] = pd.to_numeric(df[col_to_convert], errors='coerce')
+                elif new_type == "Boolean":
+                    df[col_to_convert] = df[col_to_convert].astype(bool)
+                elif new_type == "Datetime":
+                    df[col_to_convert] = pd.to_datetime(df[col_to_convert], errors='coerce')
+                elif new_type == "Category":
+                    df[col_to_convert] = df[col_to_convert].astype('category')
+                
+                cleaning_actions.append(f"Converted {col_to_convert} to {new_type}")
+                st.success("Data type converted successfully!")
+            except Exception as e:
+                st.error(f"Conversion failed: {str(e)}")
+
+    # 4. Outlier Detection & Handling
+    with st.expander("📈 Handle Outliers", expanded=True):
+        numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
+        if numeric_cols:
+            outlier_col = st.selectbox("Select numeric column", numeric_cols)
+            threshold = st.slider("Outlier Threshold (Z-Score)", 1.0, 5.0, 3.0)
+            
+            z_scores = (df[outlier_col] - df[outlier_col].mean()) / df[outlier_col].std()
+            outliers = df[abs(z_scores) > threshold]
+            
+            st.write(f"Detected {len(outliers)} outliers")
+            st.dataframe(outliers)
+            
+            if st.button("Handle Outliers"):
+                df = df[abs(z_scores) <= threshold]
+                cleaning_actions.append(f"Removed {len(outliers)} outliers from {outlier_col}")
+        else:
+            st.info("No numeric columns found for outlier detection")
+
+    # 5. Text Cleaning
+    with st.expander("📝 Clean Text Data", expanded=True):
+        text_cols = df.select_dtypes(include='object').columns.tolist()
+        if text_cols:
+            text_col = st.selectbox("Select text column", text_cols)
+            options = st.multiselect("Text Cleaning Options", [
+                "Lowercase", 
+                "Remove Punctuation", 
+                "Remove Extra Spaces", 
+                "Remove Stopwords", 
+                "Stemming"
+            ])
+            
+            if st.button("Clean Text"):
+                if "Lowercase" in options:
+                    df[text_col] = df[text_col].str.lower()
+                if "Remove Punctuation" in options:
+                    df[text_col] = df[text_col].str.replace(r'[^\w\s]', '', regex=True)
+                if "Remove Extra Spaces" in options:
+                    df[text_col] = df[text_col].str.strip().str.replace(r'\s+', ' ', regex=True)
+                if "Remove Stopwords" in options:
+                    from nltk.corpus import stopwords
+                    stop_words = set(stopwords.words('english'))
+                    df[text_col] = df[text_col].apply(
+                        lambda x: ' '.join([word for word in x.split() if word not in stop_words])
+                    )
+                if "Stemming" in options:
+                    from nltk.stem import PorterStemmer
+                    stemmer = PorterStemmer()
+                    df[text_col] = df[text_col].apply(
+                        lambda x: ' '.join([stemmer.stem(word) for word in x.split()])
+                    )
+                
+                cleaning_actions.append(f"Cleaned text in {text_col}")
+                st.success("Text cleaned successfully!")
+        else:
+            st.info("No text columns found for cleaning")
 
-            if clean_action == "Handle Missing Values":
-                columns_with_missing = df.columns[df.isnull().any()].tolist()
-                column_to_impute = st.selectbox("Column to Impute", ["All Columns"] + columns_with_missing)
-
-                method = st.selectbox("Imputation Method", [
-                    "KNN Imputation",
-                    "Median Fill",
-                    "Mean Fill",
-                    "Drop Missing",
-                    "Constant Value Fill"
-                ])
-                if method == "KNN Imputation":
-                    knn_neighbors = st.slider("KNN Neighbors", 2, 10, 5)
-                elif method == "Constant Value Fill":
-                    constant_value = st.text_input("Constant Value")
-
-            elif clean_action == "Clean Text":
-                text_column = st.selectbox("Text Column", df.select_dtypes(include='object').columns)
-                cleaning_operation = st.selectbox("Cleaning Operation", ["Remove Special Characters", "Lowercase", "Uppercase", "Remove Extra Spaces"])
-                if cleaning_operation == "Remove Special Characters":
-                    chars_to_remove = st.text_input("Characters to Remove", r'[^a-zA-Z0-9\s]')
-
-            elif clean_action == "Remove Columns":
-                remove_cols = st.multiselect("Columns to Remove", df.columns)  # Multiselect for column removal
-
-        with col2:
-            st.subheader("Data Preview")  # Added Data Preview Section
-            st.dataframe(df.head(10), use_container_width=True)  # Display sample data
-
-            if st.button("Apply Transformation"):
-                with st.spinner("Applying changes..."):
-                    current_df = df.copy()
-                    # ... (your data history logic) ...
-
-                    if clean_action == "Handle Missing Values":
-                        if method == "KNN Imputation":
-                            imputer = KNNImputer(n_neighbors=knn_neighbors)
-                            if column_to_impute == "All Columns":
-                                current_df = pd.DataFrame(imputer.fit_transform(current_df), columns=current_df.columns)
-                            else:
-                                current_df[[column_to_impute]] = pd.DataFrame(imputer.fit_transform(current_df[[column_to_impute]]), columns=[column_to_impute])
-                        elif method == "Median Fill":
-                            if column_to_impute == "All Columns":
-                                current_df = current_df.fillna(current_df.median())
-                            else:
-                                current_df[column_to_impute] = current_df[column_to_impute].fillna(current_df[column_to_impute].median())
-                        elif method == "Mean Fill":
-                            if column_to_impute == "All Columns":
-                                current_df = current_df.fillna(current_df.mean())
-                            else:
-                                current_df[column_to_impute] = current_df[column_to_impute].fillna(current_df[column_to_impute].mean())
-                        elif method == "Constant Value Fill":
-                            if column_to_impute == "All Columns":
-                                current_df = current_df.fillna(constant_value)
-                            else:
-                                current_df[column_to_impute] = current_df[column_to_impute].fillna(constant_value)
-                        else:
-                            current_df = current_df.dropna()
+    # Save Cleaned Data
+    if st.button("💾 Save Cleaned Data"):
+        st.session_state.cleaned_data = df
+        st.success("Cleaned data saved successfully!")
+        
+        # Show Cleaning Log
+        st.subheader("📝 Cleaning Log")
+        if cleaning_actions:
+            st.write("### Applied Transformations")
+            for action in cleaning_actions:
+                st.write(f"- {action}")
+        else:
+            st.info("No transformations applied yet")
+                
+# Advanced EDA Section
+elif app_mode == "Advanced EDA":
+    st.title("🔍 Advanced Exploratory Data Analysis")
+    st.markdown("""
+        **Interactive Data Exploration** with advanced statistical tools and visualizations.
+        Uncover hidden patterns and relationships in your data.
+    """)
 
-                    elif clean_action == "Clean Text":
-                        import re  # moved here since its only used here to avoid library bloat
+    if 'cleaned_data' not in st.session_state or st.session_state.cleaned_data is None:
+        st.warning("Please clean your data in the Smart Cleaning section first.")
+        st.stop()
 
-                        def clean_text(text, operation, chars_to_remove=r'[^a-zA-Z0-9\s]'):
-                            if operation == "Remove Special Characters":
-                                text = re.sub(chars_to_remove, '', str(text))
-                            elif operation == "Lowercase":
-                                text = str(text).lower()
-                            elif operation == "Uppercase":
-                                text = str(text).upper()
-                            elif operation == "Remove Extra Spaces":
-                                text = " ".join(str(text).split())
-                            return text
+    df = st.session_state.cleaned_data.copy()
+    
+    # Initialize session state for EDA configuration
+    if 'eda_config' not in st.session_state:
+        st.session_state.eda_config = {
+            'plot_type': "Histogram",
+            'x_col': df.columns[0] if len(df.columns) > 0 else None,
+            'y_col': df.columns[1] if len(df.columns) > 1 else None,
+            'z_col': df.columns[2] if len(df.columns) > 2 else None,
+            'color_col': None,
+            'size_col': None,
+            'time_col': None,
+            'value_col': None,
+            'scatter_matrix_cols': df.select_dtypes(include=np.number).columns.tolist()[:5],
+            'color_palette': "Viridis",
+            'hover_data_cols': [],
+            'filter_col': None,
+            'filter_options': []
+        }
 
-                        current_df[text_column] = current_df[text_column].astype(str).apply(lambda x: clean_text(x, cleaning_operation, chars_to_remove))
+    # Data Filtering Section
+    with st.expander("🔎 Data Filtering", expanded=True):
+        st.session_state.eda_config['filter_col'] = st.selectbox(
+            "Filter Column",
+            [None] + list(df.columns),
+            help="Choose a column to filter the data."
+        )
 
-                    elif clean_action == "Remove Columns":
-                        if remove_cols: #Check that it is not empty
-                            current_df = current_df.drop(columns=remove_cols)  # Drop selected columns
+        if st.session_state.eda_config['filter_col']:
+            unique_values = df[st.session_state.eda_config['filter_col']].unique()
+            st.session_state.eda_config['filter_options'] = st.multiselect(
+                "Filter Values",
+                unique_values,
+                default=unique_values,
+                help=f"Select values from '{st.session_state.eda_config['filter_col']}'"
+            )
+            df = df[df[st.session_state.eda_config['filter_col']].isin(
+                st.session_state.eda_config['filter_options']
+            )]
+
+    # Visualization Type Selection
+    st.sidebar.header("📊 Visualization Configuration")
+    plot_types = [
+        "Histogram", "Scatter Plot", "Box Plot", "Violin Plot",
+        "Correlation Heatmap", "Parallel Coordinates", "Andrews Curves",
+        "Pair Plot", "Density Contour", "3D Scatter", "Time Series",
+        "Sunburst Chart", "Funnel Chart", "Clustering Analysis"
+    ]
+    st.session_state.eda_config['plot_type'] = st.sidebar.selectbox(
+        "Choose Visualization",
+        plot_types,
+        index=0
+    )
 
-                    st.session_state.cleaned_data = current_df
-                    st.success("Transformation applied!")
+    # Dynamic Controls Based on Plot Type
+    if st.session_state.eda_config['plot_type'] != "Correlation Heatmap":
+        st.session_state.eda_config['x_col'] = st.sidebar.selectbox(
+            "X Axis",
+            df.columns,
+            index=df.columns.get_loc(st.session_state.eda_config['x_col'])
+            if st.session_state.eda_config['x_col'] in df.columns else 0
+        )
 
-            if st.button("Refresh Data Preview"):  # Button to refresh data preview
-                st.rerun()
-                
-elif app_mode == "Advanced EDA":
-    st.title("🔍 Advanced Exploratory Analysis")
-
-    if st.session_state.cleaned_data is not None:
-        df = st.session_state.cleaned_data.copy()
-
-        # Initialize session state for plot configuration
-        if 'plot_config' not in st.session_state:
-            st.session_state.plot_config = {
-                'plot_type': "Histogram",
-                'x_col': df.columns[0] if len(df.columns) > 0 else None,
-                'y_col': df.columns[1] if len(df.columns) > 1 else None,
-                'z_col': df.columns[2] if len(df.columns) > 2 else None,
-                'color_col': None,
-                'size_col': None,
-                'time_col': None,
-                'value_col': None,
-                'scatter_matrix_cols': df.select_dtypes(include=np.number).columns.tolist()[:5],
-                'color_palette': "#00f7ff",
-                'color_continuous_scale': "Viridis",
-                'hover_data_cols': [],
-                'filter_col': None,
-                'filter_options': []
-            }
+    if st.session_state.eda_config['plot_type'] in ["Scatter Plot", "Box Plot", "Violin Plot", "Time Series", "3D Scatter", "Histogram"]:
+        st.session_state.eda_config['y_col'] = st.sidebar.selectbox(
+            "Y Axis",
+            df.columns,
+            index=df.columns.get_loc(st.session_state.eda_config['y_col'])
+            if st.session_state.eda_config['y_col'] in df.columns else 0
+        )
 
-        # Data Filtering Section
-        with st.expander("🔎 Data Filtering", expanded=False):
-            # Use direct session state assignment for reactivity
-            st.session_state.plot_config['filter_col'] = st.selectbox(
-                "Filter Column",
-                [None] + list(df.columns),
-                help="Choose a column to filter the data."
-            )
+    if st.session_state.eda_config['plot_type'] == "3D Scatter":
+        st.session_state.eda_config['z_col'] = st.sidebar.selectbox(
+            "Z Axis",
+            df.columns,
+            index=df.columns.get_loc(st.session_state.eda_config['z_col'])
+            if st.session_state.eda_config['z_col'] in df.columns else 0
+        )
+        st.session_state.eda_config['color_col'] = st.sidebar.selectbox(
+            "Color by",
+            [None] + list(df.columns)
+        )
 
-            if st.session_state.plot_config['filter_col']:
-                unique_values = df[st.session_state.plot_config['filter_col']].unique()
-                st.session_state.plot_config['filter_options'] = st.multiselect(
-                    "Filter Values",
-                    unique_values,
-                    default=unique_values,
-                    help=f"Select values from '{st.session_state.plot_config['filter_col']}'"
-                )
-                df = df[df[st.session_state.plot_config['filter_col']].isin(
-                    st.session_state.plot_config['filter_options']
-                )]
-
-        # Visualization Configuration
-        st.sidebar.header("📊 Plot Configuration")
-
-        # Plot type selector
-        st.session_state.plot_config['plot_type'] = st.sidebar.selectbox(
-            "Choose Visualization",
-            [
-                "Histogram", "Scatter Plot", "Box Plot",
-                "Correlation Heatmap", "3D Scatter",
-                "Violin Plot", "Time Series", "Scatter Matrix"
-            ],
-            index=0  # Reset to first option when plot type changes
+    # Advanced Plot Customization
+    with st.expander("🎨 Advanced Customization", expanded=False):
+        st.session_state.eda_config['color_palette'] = st.selectbox(
+            "Color Palette",
+            ["Viridis", "Plasma", "Magma", "Cividis", "RdBu", "Rainbow"]
+        )
+        st.session_state.eda_config['hover_data_cols'] = st.multiselect(
+            "Hover Data",
+            df.columns
         )
 
-        # Dynamic controls based on plot type
-        if st.session_state.plot_config['plot_type'] != "Correlation Heatmap":
-            st.session_state.plot_config['x_col'] = st.sidebar.selectbox(
-                "X Axis",
-                df.columns,
-                index=df.columns.get_loc(st.session_state.plot_config['x_col'])
-                if st.session_state.plot_config['x_col'] in df.columns else 0
+    # Plot Generation
+    try:
+        fig = None
+        config = st.session_state.eda_config
+
+        if config['plot_type'] == "Histogram":
+            fig = px.histogram(
+                df, x=config['x_col'], y=config['y_col'],
+                nbins=30, template="plotly_dark",
+                color_discrete_sequence=[config['color_palette']]
             )
 
-        if st.session_state.plot_config['plot_type'] in ["Scatter Plot", "Box Plot",
-                                                            "Violin Plot", "Time Series",
-                                                            "3D Scatter", "Histogram"]:
-            st.session_state.plot_config['y_col'] = st.sidebar.selectbox(
-                "Y Axis",
-                df.columns,
-                index=df.columns.get_loc(st.session_state.plot_config['y_col'])
-                if st.session_state.plot_config['y_col'] in df.columns else 0
+        elif config['plot_type'] == "Scatter Plot":
+            fig = px.scatter(
+                df, x=config['x_col'], y=config['y_col'],
+                color=config['color_col'],
+                size=config['size_col'],
+                hover_data=config['hover_data_cols']
             )
 
-        if st.session_state.plot_config['plot_type'] == "3D Scatter":
-            st.session_state.plot_config['z_col'] = st.sidebar.selectbox(
-                "Z Axis",
-                df.columns,
-                index=df.columns.get_loc(st.session_state.plot_config['z_col'])
-                if st.session_state.plot_config['z_col'] in df.columns else 0
-            )
-            st.session_state.plot_config['color_col'] = st.sidebar.selectbox(
-                "Color by",
-                [None] + list(df.columns)
+        elif config['plot_type'] == "3D Scatter":
+            fig = px.scatter_3d(
+                df, x=config['x_col'], y=config['y_col'], z=config['z_col'],
+                color=config['color_col'],
+                color_discrete_sequence=[config['color_palette']]
             )
 
-        # Color configuration
-        if st.session_state.plot_config['plot_type'] == "Correlation Heatmap":
-            st.session_state.plot_config['color_continuous_scale'] = st.sidebar.selectbox(
-                "Color Scale",
-                ['Viridis', 'Plasma', 'Magma', 'Cividis', 'RdBu']
-            )
-        else:
-            st.session_state.plot_config['color_palette'] = st.sidebar.selectbox(
-                "Color Palette",
-                ['#00f7ff', '#ff00ff', '#f70000', '#0000f7']
-            )
+        elif config['plot_type'] == "Correlation Heatmap":
+            numeric_df = df.select_dtypes(include=np.number)
+            if not numeric_df.empty:
+                corr = numeric_df.corr()
+                fig = px.imshow(
+                    corr, text_auto=True,
+                    color_continuous_scale=config['color_palette']
+                )
+            else:
+                st.warning("No numerical columns found for correlation heatmap.")
 
-        # Additional configurations
-        if st.session_state.plot_config['plot_type'] == "Scatter Plot":
-            st.session_state.plot_config['size_col'] = st.sidebar.selectbox(
-                "Size by",
-                [None] + list(df.columns)
-            )
-            st.session_state.plot_config['hover_data_cols'] = st.sidebar.multiselect(
-                "Hover Data",
-                df.columns
+        elif config['plot_type'] == "Box Plot":
+            fig = px.box(
+                df, x=config['x_col'], y=config['y_col'],
+                color=config['color_col']
             )
 
-        if st.session_state.plot_config['plot_type'] == "Time Series":
-            st.session_state.plot_config['time_col'] = st.sidebar.selectbox(
-                "Time Column",
-                df.columns
-            )
-            st.session_state.plot_config['value_col'] = st.sidebar.selectbox(
-                "Value Column",
-                df.columns
+        elif config['plot_type'] == "Violin Plot":
+            fig = px.violin(
+                df, x=config['x_col'], y=config['y_col'],
+                box=True, points="all",
+                color=config['color_col']
             )
 
-        if st.session_state.plot_config['plot_type'] == "Scatter Matrix":
-            st.session_state.plot_config['scatter_matrix_cols'] = st.multiselect(
-                "Columns for Scatter Matrix",
-                df.select_dtypes(include=np.number).columns,
-                default=st.session_state.plot_config['scatter_matrix_cols']
+        elif config['plot_type'] == "Time Series":
+            df = df.sort_values(by=config['time_col'])
+            fig = px.line(
+                df, x=config['time_col'], y=config['value_col'],
+                color=config['color_col']
             )
 
-        # Plot generation
-        try:
-            fig = None
-            config = st.session_state.plot_config
-
-            if config['plot_type'] == "Histogram":
-                fig = px.histogram(
-                    df, x=config['x_col'], y=config['y_col'],
-                    nbins=30, template="plotly_dark",
-                    color_discrete_sequence=[config['color_palette']]
-                )
-
-            elif config['plot_type'] == "Scatter Plot":
-                fig = px.scatter(
-                    df, x=config['x_col'], y=config['y_col'],
-                    color_discrete_sequence=[config['color_palette']],
-                    size=config['size_col'],
-                    hover_data=config['hover_data_cols']
-                )
-
-            elif config['plot_type'] == "3D Scatter":
-                fig = px.scatter_3d(
-                    df, x=config['x_col'], y=config['y_col'], z=config['z_col'],
-                    color=config['color_col'],
-                    color_discrete_sequence=[config['color_palette']]
-                )
-
-            elif config['plot_type'] == "Correlation Heatmap":
-                numeric_df = df.select_dtypes(include=np.number)
-                if not numeric_df.empty:
-                    corr = numeric_df.corr()
-                    fig = px.imshow(
-                        corr, text_auto=True,
-                        color_continuous_scale=config['color_continuous_scale']
-                    )
-                else:
-                    st.warning("No numerical columns found for correlation heatmap.")
-
-            elif config['plot_type'] == "Box Plot":
-                fig = px.box(
-                    df, x=config['x_col'], y=config['y_col'],
-                    color_discrete_sequence=[config['color_palette']]
-                )
-
-            elif config['plot_type'] == "Violin Plot":
-                fig = px.violin(
-                    df, x=config['x_col'], y=config['y_col'],
-                    box=True, points="all",
-                    color_discrete_sequence=[config['color_palette']]
-                )
-
-            elif config['plot_type'] == "Time Series":
-                df = df.sort_values(by=config['time_col'])
-                fig = px.line(
-                    df, x=config['time_col'], y=config['value_col'],
-                    color_discrete_sequence=[config['color_palette']]
-                )
+        elif config['plot_type'] == "Scatter Matrix":
+            fig = px.scatter_matrix(
+                df, dimensions=config['scatter_matrix_cols'],
+                color=config['color_col']
+            )
 
-            elif config['plot_type'] == "Scatter Matrix":
-                fig = px.scatter_matrix(
-                    df, dimensions=config['scatter_matrix_cols'],
-                    color_discrete_sequence=[config['color_palette']]
-                )
+        if fig:
+            st.plotly_chart(fig, use_container_width=True)
+    except Exception as e:
+        st.error(f"An error occurred while generating the plot: {e}")
+
+    # Statistical Analysis Section
+    with st.expander("📊 Statistical Analysis", expanded=True):
+        analysis_type = st.selectbox("Select Analysis Type", [
+            "Descriptive Statistics", 
+            "Correlation Analysis", 
+            "Hypothesis Testing", 
+            "Distribution Fitting"
+        ])
 
-            if fig:
-                st.plotly_chart(fig, use_container_width=True)
-        except Exception as e:
-            st.error(f"An error occurred while generating the plot: {e}")
+        if analysis_type == "Descriptive Statistics":
+            st.write(df.describe(include='all'))
 
-        with st.expander("🧪 Hypothesis Testing"):
-            test_type = st.selectbox("Select Test Type", ["T-test", "Chi-Squared Test"])
+        elif analysis_type == "Correlation Analysis":
+            numeric_cols = df.select_dtypes(include=np.number).columns
+            if len(numeric_cols) >= 2:
+                corr_method = st.selectbox("Correlation Method", [
+                    "Pearson", "Kendall", "Spearman"
+                ])
+                corr_matrix = df[numeric_cols].corr(method=corr_method.lower())
+                st.write(corr_matrix)
+                st.heatmap(corr_matrix, annot=True, cmap=config['color_palette'])
+            else:
+                st.warning("Need at least 2 numeric columns for correlation analysis")
 
+        elif analysis_type == "Hypothesis Testing":
+            test_type = st.selectbox("Select Test Type", [
+                "T-test", "Chi-Squared Test", "ANOVA", "Mann-Whitney U"
+            ])
             if test_type == "T-test":
                 col1 = st.selectbox("Column 1 (Numeric)", df.select_dtypes(include=np.number).columns)
                 col2 = st.selectbox("Column 2 (Categorical)", df.select_dtypes(include='object').columns)
                 if st.button("Run T-test"):
-                    # Example: Split data by category and perform t-test
-                    try:
-                        groups = df.groupby(col2)[col1].apply(list)
-                        if len(groups) == 2:
-                            t_stat, p_value = stats.ttest_ind(groups.iloc[0], groups.iloc[1])
-                            st.write(f"T-statistic: {t_stat:.4f}")
-                            st.write(f"P-value: {p_value:.4f}")
-                            if p_value < 0.05:
-                                st.write("Reject the null hypothesis.")
-                            else:
-                                st.write("Fail to reject the null hypothesis.")
+                    groups = df.groupby(col2)[col1].apply(list)
+                    if len(groups) == 2:
+                        t_stat, p_value = stats.ttest_ind(groups.iloc[0], groups.iloc[1])
+                        st.write(f"T-statistic: {t_stat:.4f}")
+                        st.write(f"P-value: {p_value:.4f}")
+                        if p_value < 0.05:
+                            st.write("Reject the null hypothesis.")
                         else:
-                            st.write("Select a categorical column with exactly two categories.")
-                    except Exception as e:
-                        st.error(f"An error occurred during the T-test: {e}")
-
+                            st.write("Fail to reject the null hypothesis.")
+                    else:
+                        st.write("Select a categorical column with exactly two categories.")
+
+        elif analysis_type == "Distribution Fitting":
+            numeric_col = st.selectbox("Select Numeric Column", df.select_dtypes(include=np.number).columns)
+            dist_types = ["Normal", "Log-Normal", "Exponential", "Gamma"]
+            selected_dist = st.selectbox("Select Distribution Type", dist_types)
+            if st.button("Fit Distribution"):
+                from scipy.stats import norm, lognorm, expon, gamma
+                dist_functions = {
+                    "Normal": norm,
+                    "Log-Normal": lognorm,
+                    "Exponential": expon,
+                    "Gamma": gamma
+                }
+                params = dist_functions[selected_dist].fit(df[numeric_col].dropna())
+                st.write(f"Fitted Parameters: {params}")
+
+    # Data Profiling Section
+    with st.expander("📝 Generate Full Data Profile", expanded=False):
+        if st.button("🚀 Generate Comprehensive Report"):
+            with st.spinner("Generating report..."):
+                pr = ProfileReport(df, explorative=True)
+                st_profile_report(pr)
+
+# Model Training Section
 elif app_mode == "Model Training":
-    st.title("🚂 Model Training")
-
-    if st.session_state.cleaned_data is not None:
-        df = st.session_state.cleaned_data.copy()
-
-        # Initialize session state for train/test split
-        if 'X_train_selected' not in st.session_state:
-            st.session_state.X_train_selected = None
-            st.session_state.X_test_selected = None
-            st.session_state.y_train = None
-            st.session_state.y_test = None
-            st.session_state.model = None  # Initialize model in session state
-
-        # Target Variable Selection
-        target_column = st.selectbox("Select Target Variable", df.columns, help="Choose the column to predict.")
-
-        # Problem Type Selection
-        problem_type = st.radio("Select Problem Type", ["Regression", "Classification"], help="Choose the type of problem.")
-
-        # Feature Selection
-        feature_columns = st.multiselect("Select Feature Columns", df.drop(columns=[target_column]).columns, help="Choose features for training.")
-
-        # Model Selection - Dynamic based on Problem Type
-        if problem_type == "Regression":
-            model_options = ["Linear Regression", "Decision Tree", "Random Forest", "Gradient Boosting", "SVM"]
-        else:  # Classification
-            model_options = ["Logistic Regression", "Decision Tree", "Random Forest", "Gradient Boosting", "SVM", "Naive Bayes", "KNN"]
-
-        model_name = st.selectbox("Select Model", model_options, help="Choose a model.")
-
-        feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
-
-        # Hyperparameter Tuning - Dynamic based on Model Selection
-        param_grid = {}  # Initialize to empty dictionary
-
-        #Define different paramter values for the model so it works. This is not an optimized number
-        #The goal is to make sure that all visualizations and graphs work as is.
+    st.title("🚂 Model Training Studio")
+    st.markdown("""
+        **Train and Evaluate Machine Learning Models** with advanced hyperparameter tuning and performance tracking.
+        Choose from a wide range of algorithms and configurations.
+    """)
+
+    if 'cleaned_data' not in st.session_state or st.session_state.cleaned_data is None:
+        st.warning("Please clean your data in the Smart Cleaning section first.")
+        st.stop()
+
+    df = st.session_state.cleaned_data.copy()
+
+    # Target Variable Selection
+    st.subheader("🎯 Target Variable")
+    target_column = st.selectbox("Select Target Variable", df.columns, help="Choose the column to predict.")
+
+    # Problem Type Selection
+    st.subheader("📝 Problem Type")
+    problem_type = st.radio("Select Problem Type", ["Regression", "Classification"], help="Choose the type of machine learning problem.")
+
+    # Feature Selection
+    st.subheader("🔧 Feature Selection")
+    use_all_features = st.checkbox("Use All Features", value=True, help="Select to use all features for training. Deselect to manually choose features.")
+    if use_all_features:
+        feature_columns = df.drop(columns=[target_column]).columns.tolist()
+    else:
+        feature_columns = st.multiselect("Select Feature Columns", df.drop(columns=[target_column]).columns, help="Choose the features you want to use for prediction.")
+
+    # Model Selection
+    st.subheader("🤖 Model Selection")
+    if problem_type == "Regression":
+        model_options = ["Linear Regression", "Decision Tree", "Random Forest", "Gradient Boosting", "SVM", "Neural Network"]
+    else:  # Classification
+        model_options = ["Logistic Regression", "Decision Tree", "Random Forest", "Gradient Boosting", "SVM", "Neural Network", "KNN", "Naive Bayes"]
+    model_name = st.selectbox("Select Model", model_options, help="Choose a model.")
+
+    # Hyperparameter Tuning
+    st.subheader("🎛️ Hyperparameter Tuning")
+    with st.expander("Configure Hyperparameters", expanded=True):
         if model_name == "Random Forest":
-            st.subheader("Random Forest Hyperparameters")
-            param_grid = {
-                'n_estimators': list(range(100, 101)), #Used 100 so model is trained and not empty and all visuals work
-
-                'max_depth': list(range(10,11)), #default value 10 so its in model
-                'min_samples_split': list(range(2,3)), #New hyperparameter default 2
-                'min_samples_leaf': list(range(1,2)), #New hyperparameter default 1
+            n_estimators = st.slider("Number of Estimators", 10, 200, 100)
+            max_depth = st.slider("Max Depth", 3, 20, 10)
+            min_samples_split = st.slider("Min Samples Split", 2, 10, 2)
+            min_samples_leaf = st.slider("Min Samples Leaf", 1, 10, 1)
+            hyperparams = {
+                'n_estimators': n_estimators,
+                'max_depth': max_depth,
+                'min_samples_split': min_samples_split,
+                'min_samples_leaf': min_samples_leaf
             }
-
         elif model_name == "Gradient Boosting":
-            st.subheader("Gradient Boosting Hyperparameters")
-            param_grid = {
-                'n_estimators': list(range(100, 101)),
-                'learning_rate': [0.1],
-                'max_depth': list(range(3,4))
-
+            learning_rate = st.slider("Learning Rate", 0.01, 1.0, 0.1)
+            n_estimators = st.slider("Number of Estimators", 10, 200, 100)
+            max_depth = st.slider("Max Depth", 3, 20, 10)
+            hyperparams = {
+                'learning_rate': learning_rate,
+                'n_estimators': n_estimators,
+                'max_depth': max_depth
             }
-
-        elif model_name == "Decision Tree":
-            st.subheader("Decision Tree Hyperparameters")
-            param_grid = {
-                'criterion': ["gini"],
-                'max_depth': list(range(3,4)),
+        elif model_name == "Neural Network":
+            hidden_layers = st.slider("Number of Hidden Layers", 1, 5, 2)
+            neurons_per_layer = st.slider("Neurons per Layer", 10, 200, 50)
+            epochs = st.slider("Epochs", 10, 200, 50)
+            batch_size = st.slider("Batch Size", 16, 128, 32)
+            hyperparams = {
+                'hidden_layers': hidden_layers,
+                'neurons_per_layer': neurons_per_layer,
+                'epochs': epochs,
+                'batch_size': batch_size
             }
+        else:
+            hyperparams = {}
 
-         # Train-Test Split
-        test_size = st.slider("Test Size", 0.1, 0.5, 0.2, help="Proportion of the dataset to include in the test split.")
+    # Train-Test Split
+    st.subheader("✂️ Train-Test Split")
+    test_size = st.slider("Test Size", 0.1, 0.5, 0.2, help="Proportion of the dataset to include in the test split.")
 
-        if st.button("Train Model"):
-            with st.spinner("Training model..."):
-                try:
-                    X = df[feature_columns]
-                    y = df[target_column]
+    # Model Training
+    if st.button("🚀 Train Model"):
+        with st.spinner("Training model..."):
+            try:
+                X = df[feature_columns]
+                y = df[target_column]
 
-                    # Check if X is empty
-                    if X.empty:
-                        st.error("No features were selected. Please select feature columns.")
-                        st.stop()
+                # Check if X is empty
+                if X.empty:
+                    st.error("No features were selected. Please select feature columns.")
+                    st.stop()
 
-                    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
+                # Train-Test Split
+                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
 
-                    # Preprocessing Pipeline
-                    numeric_features = X.select_dtypes(include=np.number).columns
-                    categorical_features = X.select_dtypes(exclude=np.number).columns
+                # Preprocessing Pipeline
+                numeric_features = X.select_dtypes(include=np.number).columns
+                categorical_features = X.select_dtypes(exclude=np.number).columns
 
-                    numeric_transformer = Pipeline(steps=[
-                        ('imputer', SimpleImputer(strategy='median')),
-                        ('scaler', StandardScaler())
-                    ])
+                numeric_transformer = Pipeline(steps=[
+                    ('imputer', SimpleImputer(strategy='median')),
+                    ('scaler', StandardScaler())
+                ])
 
-                    categorical_transformer = Pipeline(steps=[
-                        ('imputer', SimpleImputer(strategy='most_frequent')),
-                        ('onehot', OneHotEncoder(handle_unknown='ignore'))
+                categorical_transformer = Pipeline(steps=[
+                    ('imputer', SimpleImputer(strategy='most_frequent')),
+                    ('onehot', OneHotEncoder(handle_unknown='ignore'))
+                ])
+
+                preprocessor = ColumnTransformer(
+                    transformers=[
+                        ('num', numeric_transformer, numeric_features),
+                        ('cat', categorical_transformer, categorical_features)
                     ])
 
-                    preprocessor = ColumnTransformer(
-                        transformers=[
-                            ('num', numeric_transformer, numeric_features),
-                            ('cat', categorical_transformer, categorical_features)
-                        ])
-
-                    X_train_processed = preprocessor.fit_transform(X_train)
-                    X_test_processed = preprocessor.transform(X_test)
-
-                    #Feature Selection
-                    if feature_selection_method == "SelectKBest":
-                        k = st.slider("Number of Features to Select", 1, len(feature_columns), len(feature_columns), key = "featureselector")
-                        selector = SelectKBest(k=k)
-                        X_train_selected = selector.fit_transform(X_train_processed, y_train)
-                        X_test_selected = selector.transform(X_test_processed)
-                    else:
-                        X_train_selected = X_train_processed
-                        X_test_selected = X_test_processed
-
-                    # Model Training and Hyperparameter Tuning
-                    if model_name == "Linear Regression":
-                        model = LinearRegression()
-                        model.fit(X_train_selected, y_train)
-
-                    elif model_name == "Logistic Regression":
-                        model = LogisticRegression(max_iter=1000)
-                        model.fit(X_train_selected, y_train)
-                    elif model_name == "Decision Tree":
-                        if problem_type == "Regression":
-                            model = DecisionTreeRegressor()
-                            model.fit(X_train_selected, y_train)
-                        else:
-                            model = DecisionTreeClassifier()
-                            model.fit(X_train_selected, y_train)
-                    elif model_name == "Random Forest":
-                        if problem_type == "Regression":
-                            model = RandomForestRegressor(random_state=42)
-                            if 'param_grid' in locals() and param_grid: #added param_grid not empty condition
-                                grid_search = GridSearchCV(model, param_grid, cv=3, scoring='neg_mean_squared_error')  # Example scoring
-                                grid_search.fit(X_train_selected, y_train)
-                                model = grid_search.best_estimator_
-                                st.write("Best Parameters:", grid_search.best_params_)
-                            else:
-                                model = RandomForestRegressor(random_state=42) #define if no param_grid
-                                model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined.
+                X_train_processed = preprocessor.fit_transform(X_train)
+                X_test_processed = preprocessor.transform(X_test)
 
-                        else:
-                            model = RandomForestClassifier(random_state=42)
-                            if 'param_grid' in locals()and param_grid: #added param_grid not empty condition
-                                grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy')
-                                grid_search.fit(X_train_selected, y_train)
-                                model = grid_search.best_estimator_
-                                st.write("Best Parameters:", grid_search.best_params_)
-                            else:
-                                 model = RandomForestClassifier(random_state=42) #define if no param_grid
-                                 model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
-                    elif model_name == "Gradient Boosting":
-                        from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier #moved import here to avoid bloat
-                        model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
-                        model.fit(X_train_selected, y_train)
-                    elif model_name == "SVM":
-                        model = SVR() if problem_type == "Regression" else SVC()
-                        model.fit(X_train_selected, y_train)
-                    elif model_name == "Naive Bayes":
-                         from sklearn.naive_bayes import GaussianNB
-                         model = GaussianNB()
-                         model.fit(X_train_selected, y_train)
-                    elif model_name == "KNN":
-                         from sklearn.neighbors import KNeighborsClassifier
-                         model = KNeighborsClassifier()
-                         model.fit(X_train_selected, y_train)
-
-                    # Store model and preprocessor
-                    st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
-                    st.session_state.preprocessor = preprocessor
-
-                    #Store the test data
-                    st.session_state.X_train_selected = X_train_selected
-                    st.session_state.X_test_selected = X_test_selected
-                    st.session_state.y_train = y_train
-                    st.session_state.y_test = y_test
-
-                    # Model Evaluation
-                    y_pred = model.predict(X_test_selected)
+                # Model Training
+                if model_name == "Linear Regression":
+                    model = LinearRegression()
+                elif model_name == "Logistic Regression":
+                    model = LogisticRegression(max_iter=1000)
+                elif model_name == "Decision Tree":
                     if problem_type == "Regression":
-                        mse = mean_squared_error(y_test, y_pred)
-                        r2 = r2_score(y_test, y_pred)
-                        st.write(f"Mean Squared Error: {mse:.4f}")
-                        st.write(f"R-squared: {r2:.4f}")
+                        model = DecisionTreeRegressor()
                     else:
-                        from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve, classification_report #Import here to avoid library bloat
-                        import seaborn as sns
-                        import matplotlib.pyplot as plt #Added import statement
-                        import numpy as np
-                        import pandas as pd
-                        from sklearn.model_selection import learning_curve, validation_curve
-
-                        #Weighted averaging for metrics for multiclass
-                        average_method = "weighted" #changed from None
-
-                        accuracy = accuracy_score(y_test, y_pred)
-                        precision = precision_score(y_test, y_pred, average = average_method, zero_division = 0)
-                        recall = recall_score(y_test, y_pred, average = average_method, zero_division = 0)
-                        f1 = f1_score(y_test, y_pred, average = average_method, zero_division = 0)
-                        st.write(f"Accuracy: {accuracy:.4f}")
-                        st.write(f"Precision: {precision:.4f}")
-                        st.write(f"Recall: {recall:.4f}")
-                        st.write(f"F1 Score: {f1:.4f}")
-                        st.write("Classification Report:")
-                        st.text(classification_report(y_test, y_pred, zero_division = 0))
-
-
-                        #Confusion Matrix
-
-                        conf_matrix = confusion_matrix(y_test, y_pred)
-
-                        #Heatmap
-                        fig_conf, ax_conf = plt.subplots()
-                        sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=ax_conf)
-                        ax_conf.set_xlabel('Predicted Labels')
-                        ax_conf.set_ylabel('True Labels')
-                        ax_conf.set_title('Confusion Matrix')
-                        st.pyplot(fig_conf)
-
-
-                    #Added section for model visualization
-                    st.subheader("Model Visualization")
-                    #Use conditional to make sure that everything only executes when the data set is trained and not outside of it.
-                    if st.session_state.model is not None: #Make sure that everything only executes when data set is trained and not outside of it.
-                         try: #All the plotting code here.
-                            if model_name in ["Random Forest", "Gradient Boosting"]:#Used list to define models.
-                                 #Make sure you use this inside of a conditional for classification, model, and tree based model.
-
-                                    #Feature Importance (Tree-based Models)
-
-                                    importances = model.feature_importances_ # Assumed tree-based model
-                                    feat_importances = pd.Series(importances, index=X_train.columns)
-                                    feat_importances = feat_importances.nlargest(20)
-
-                                    fig_feat, ax_feat = plt.subplots()
-                                    feat_importances.plot(kind='barh', ax=ax_feat)
-                                    ax_feat.set_xlabel('Relative Importance')
-                                    ax_feat.set_ylabel('Features')
-                                    ax_feat.set_title('Feature Importances')
-                                    st.pyplot(fig_feat)
-
-                            #Create data that determines the learning and validation curve and what we have to add
-                            train_sizes, train_scores, valid_scores = learning_curve(model, X_train_selected, y_train, cv=5, scoring='accuracy' if problem_type =="Classification" else 'neg_mean_squared_error', n_jobs=-1) #Define cross validation for run
-
-                            #Then add a plot for the learning curve and use st.pyplot
-                            train_mean = np.mean(train_scores, axis=1)
-                            train_std = np.std(train_scores, axis=1)
-                            valid_mean = np.mean(valid_scores, axis=1)
-                            valid_std = np.std(valid_scores, axis=1)
-
-                            #Plot each of the variables that has to be used.
-
-                            fig_lc, ax_lc = plt.subplots()
-                            ax_lc.plot(train_sizes, train_mean, color='blue', marker='o', markersize=5, label='Training ' + ('Accuracy' if problem_type == "Classification" else "Neg MSE"))
-                            ax_lc.fill_between(train_sizes, train_mean + train_std, train_mean - train_std, alpha=0.15, color='blue')
-                            ax_lc.plot(train_sizes, valid_mean, color='green', linestyle='--', marker='s', markersize=5, label='Validation ' + ('Accuracy' if problem_type == "Classification" else "Neg MSE"))
-                            ax_lc.fill_between(train_sizes, valid_mean + valid_std, valid_mean - valid_std, alpha=0.15, color='green')
-
-                            ax_lc.set_title('Learning Curves')
-                            ax_lc.set_xlabel('Training Set Size')
-                            ax_lc.set_ylabel('Score')
-                            ax_lc.legend(loc='best')
-                            st.pyplot(fig_lc)
-
-                         except Exception as e: #Local error
-                            st.write(f"Visuals are only available for tree based models or if models are selected prior: {e}") #Write only if error
-
-                except Exception as e:
-                    st.error(f"An error occurred: {e}")
-
-    else:
-        st.write("Please upload and clean data first.")
+                        model = DecisionTreeClassifier()
+                elif model_name == "Random Forest":
+                    if problem_type == "Regression":
+                        model = RandomForestRegressor(**hyperparams)
+                    else:
+                        model = RandomForestClassifier(**hyperparams)
+                elif model_name == "Gradient Boosting":
+                    if problem_type == "Regression":
+                        model = GradientBoostingRegressor(**hyperparams)
+                    else:
+                        model = GradientBoostingClassifier(**hyperparams)
+                elif model_name == "SVM":
+                    if problem_type == "Regression":
+                        model = SVR()
+                    else:
+                        model = SVC()
+                elif model_name == "Neural Network":
+                    if problem_type == "Regression":
+                        model = MLPRegressor(
+                            hidden_layer_sizes=[hyperparams['neurons_per_layer']] * hyperparams['hidden_layers'],
+                            max_iter=hyperparams['epochs'],
+                            batch_size=hyperparams['batch_size']
+                        )
+                    else:
+                        model = MLPClassifier(
+                            hidden_layer_sizes=[hyperparams['neurons_per_layer']] * hyperparams['hidden_layers'],
+                            max_iter=hyperparams['epochs'],
+                            batch_size=hyperparams['batch_size']
+                        )
+                elif model_name == "KNN":
+                    model = KNeighborsClassifier()
+                elif model_name == "Naive Bayes":
+                    model = GaussianNB()
+
+                # Train the model
+                model.fit(X_train_processed, y_train)
+
+                # Store model and preprocessor
+                st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
+                st.session_state.preprocessor = preprocessor
+
+                # Store the test data
+                st.session_state.X_train_selected = X_train_processed
+                st.session_state.X_test_selected = X_test_processed
+                st.session_state.y_train = y_train
+                st.session_state.y_test = y_test
+
+                # Model Evaluation
+                y_pred = model.predict(X_test_processed)
+                if problem_type == "Regression":
+                    mse = mean_squared_error(y_test, y_pred)
+                    rmse = np.sqrt(mse)
+                    mae = mean_absolute_error(y_test, y_pred)
+                    r2 = r2_score(y_test, y_pred)
+                    st.write(f"Mean Squared Error: {mse:.4f}")
+                    st.write(f"Root Mean Squared Error: {rmse:.4f}")
+                    st.write(f"Mean Absolute Error: {mae:.4f}")
+                    st.write(f"R-squared: {r2:.4f}")
+                else:
+                    accuracy = accuracy_score(y_test, y_pred)
+                    precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
+                    recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
+                    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
+                    st.write(f"Accuracy: {accuracy:.4f}")
+                    st.write(f"Precision: {precision:.4f}")
+                    st.write(f"Recall: {recall:.4f}")
+                    st.write(f"F1 Score: {f1:.4f}")
+                    st.write("Classification Report:")
+                    st.text(classification_report(y_test, y_pred))
+
+                # Visualization
+                st.subheader("📊 Model Performance Visualization")
+                if problem_type == "Regression":
+                    fig, ax = plt.subplots()
+                    ax.scatter(y_test, y_pred)
+                    ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2)
+                    ax.set_xlabel('Actual')
+                    ax.set_ylabel('Predicted')
+                    ax.set_title('Actual vs Predicted')
+                    st.pyplot(fig)
+                else:
+                    conf_matrix = confusion_matrix(y_test, y_pred)
+                    fig, ax = plt.subplots()
+                    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=ax)
+                    ax.set_xlabel('Predicted Labels')
+                    ax.set_ylabel('True Labels')
+                    ax.set_title('Confusion Matrix')
+                    st.pyplot(fig)
+
+                st.success("Model trained successfully!")
+            except Exception as e:
+                st.error(f"An error occurred during training: {e}")
 
-       # Model Saving
+    # Model Saving
+    if st.session_state.model is not None:
+        st.subheader("💾 Save Model")
         model_filename = st.text_input("Enter Model Filename (without extension)", "trained_model")
         if st.button("Save Model"):
             try:
@@ -868,498 +864,579 @@ elif app_mode == "Model Training":
                 st.success(f"Model saved as {model_filename}.joblib")
             except Exception as e:
                 st.error(f"Error saving model: {e}")
-       # Model loading in a different section
-        model_file = st.file_uploader("Upload Trained Model", type=["joblib"])
-        if model_file is not None:
-            try:
-                st.session_state.model = joblib.load(model_file)
-                st.success("Model loaded successfully!")
-            except Exception as e:
-                st.error(f"Error loading model: {e}")
 
-       #Model Evaluation Section - run on the saved model
-        if  st.session_state.model is not None and st.session_state.X_test_selected is not None: # added check to make sure it is a loaded model
-            try:
-                y_pred = st.session_state.model.predict(st.session_state.X_test_selected) # load from stored
+# Visualization Lab Section
+elif app_mode == "Visualization Lab":
+    st.title("🔬 Visualization Lab")
+    st.markdown("""
+        **Explore and Visualize Your Data** with advanced plotting tools and interactive visualizations.
+        Uncover hidden patterns and relationships in your data.
+    """)
+
+    if 'cleaned_data' not in st.session_state or st.session_state.cleaned_data is None:
+        st.warning("Please clean your data in the Smart Cleaning section first.")
+        st.stop()
+
+    df = st.session_state.cleaned_data.copy()
+
+    # Visualization Type Selection
+    st.subheader("📊 Choose Visualization Type")
+    plot_types = [
+        "Histogram", "Scatter Plot", "Box Plot", "Violin Plot",
+        "Correlation Heatmap", "Parallel Coordinates", "Andrews Curves",
+        "Pair Plot", "Density Contour", "3D Scatter", "Time Series",
+        "Sunburst Chart", "Funnel Chart", "Clustering Analysis"
+    ]
+    plot_type = st.selectbox("Select Visualization Type", plot_types)
+
+    # Dynamic Controls Based on Plot Type
+    if plot_type != "Correlation Heatmap":
+        x_col = st.selectbox("X Axis", df.columns)
+
+    if plot_type in ["Scatter Plot", "Box Plot", "Violin Plot", "Time Series", "3D Scatter", "Histogram"]:
+        y_col = st.selectbox("Y Axis", df.columns)
+
+    if plot_type == "3D Scatter":
+        z_col = st.selectbox("Z Axis", df.columns)
+        color_col = st.selectbox("Color by", [None] + list(df.columns))
+
+    # Advanced Plot Customization
+    with st.expander("🎨 Advanced Customization", expanded=False):
+        color_palette = st.selectbox("Color Palette", ["Viridis", "Plasma", "Magma", "Cividis", "RdBu", "Rainbow"])
+        hover_data_cols = st.multiselect("Hover Data", df.columns)
+
+    # Plot Generation
+    try:
+        fig = None
 
-                if problem_type == "Regression":
-                    mse = mean_squared_error(st.session_state.y_test, y_pred)
-                    r2 = r2_score(st.session_state.y_test, y_pred)
-                    st.write(f"Mean Squared Error: {mse:.4f}")
-                    st.write(f"R-squared: {r2:.4f}")
-                else:
-                     from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve, classification_report #Import here to avoid library bloat
-                     accuracy = accuracy_score(st.session_state.y_test, y_pred)
-                     st.write(f"Accuracy: {accuracy:.4f}")
-            except Exception as e: #local error
-                 st.error(f"An error occurred during model evaluation: {e}")
-                
-elif app_mode == "Predictions":
-    st.title("🔮 Make Predictions")
+        if plot_type == "Histogram":
+            fig = px.histogram(
+                df, x=x_col, y=y_col,
+                nbins=30, template="plotly_dark",
+                color_discrete_sequence=[color_palette]
+            )
 
-    if st.session_state.model is not None and st.session_state.cleaned_data is not None:
-        df = st.session_state.cleaned_data.copy()
-        model = st.session_state.model.steps[-1][1] #Define model from the state
+        elif plot_type == "Scatter Plot":
+            fig = px.scatter(
+                df, x=x_col, y=y_col,
+                color=color_col,
+                size=hover_data_cols,
+                hover_data=hover_data_cols
+            )
 
-        try:
-            numeric_transformer_columns = st.session_state.model.steps[0][1].transformers_[0][2] if hasattr(st.session_state.model.steps[0][1].transformers_[0][2], '__len__') else []
-            categorical_transformer_columns = st.session_state.model.steps[0][1].transformers_[1][2] if hasattr(st.session_state.model.steps[0][1].transformers_[1][2], '__len__') else []
-            model_columns = numeric_transformer_columns + categorical_transformer_columns
-        except AttributeError as e:
-            st.error(f"Error accessing model transformers: {e}. Please ensure a valid model is trained and loaded.")
-            st.stop()
+        elif plot_type == "3D Scatter":
+            fig = px.scatter_3d(
+                df, x=x_col, y=y_col, z=z_col,
+                color=color_col,
+                color_discrete_sequence=[color_palette]
+            )
 
-        model_is_classification = hasattr(model, 'predict_proba')  # Check for classification or other problem
-        if not set(model_columns).issubset(set(df.columns)): #Fixed comparison
-            st.error("The model was trained on a dataframe that contains different columns than the currently uploaded dataframe. Please upload the correct dataframe.")
-            st.stop()
-        
-        input_data = {}
-        st.subheader("Enter Data for Prediction")
-        for col in model_columns:
-            if pd.api.types.is_numeric_dtype(df[col]):
-                input_data[col] = st.number_input(f"Enter {col}", value=df[col].mean())
+        elif plot_type == "Correlation Heatmap":
+            numeric_df = df.select_dtypes(include=np.number)
+            if not numeric_df.empty:
+                corr = numeric_df.corr()
+                fig = px.imshow(
+                    corr, text_auto=True,
+                    color_continuous_scale=color_palette
+                )
             else:
-                input_data[col] = st.selectbox(f"Select {col}", df[col].unique())
-
-        # Prediction Button
-        if st.button("Make Prediction"):
-            try:
-                input_df = pd.DataFrame([input_data])
-                #Preprocess for model
-                input_processed = st.session_state.preprocessor.transform(input_df)
-                prediction = st.session_state.model.predict(input_processed)[0]
-                st.subheader("Prediction Result")
-                st.write(f"The predicted value is: {prediction}")
-
-                # Show shap values chart
-                show_shap_values = st.checkbox("View SHAP Explanation") #select model to show shap values
-
-
-                if show_shap_values and model_is_classification and model_name not in ["Linear Regression","Logistic Regression","SVM","Naive Bayes", "KNN"]:#Show shap values if this can perform.
-
-                    try:
-                        import shap #Import lib
-                        explainer = shap.TreeExplainer(st.session_state.model.steps[-1][1]) #Used tree model because these are easily visualized
-
-                        shap_values = explainer.shap_values(input_processed) #Get output of each values, only used in tree models
-
-                        st.subheader("SHAP Values")
-                        #Plot for each of the different class labels.
-
-                        shap.initjs()
-                        fig_shap, ax_shap = plt.subplots(1, figsize = (10,10))
-                        shap.summary_plot(shap_values, features = input_processed, feature_names = model_columns, plot_type = "bar")#plot for multi class labels
-                        st.pyplot(fig_shap) #Show the figure
-                    except Exception as e:
-                        st.write(f"Can show shap values on tree based model: {e}") #Show error
-                 # Additional Feedback (Example for Classification)
-                if hasattr(st.session_state.model.steps[-1][1], 'predict_proba'): #If the end variable has predict_proba and is therefore a predictor
-                    probabilities = st.session_state.model.predict_proba(input_processed)[0]
-                    st.write("Predicted Probabilities:")
-                    st.write(probabilities) #write here
-            except Exception as e:
-                st.error(f"An error occurred during prediction: {e}")
-
-        #Add batch prediction section in prediction tab
-        st.subheader("Batch Predictions")
-        batch_file = st.file_uploader("Upload CSV for Batch Predictions", type=["csv"])
-        if batch_file is not None:
-            try:
-                batch_df = pd.read_csv(batch_file)
-                 #Verify data types and if it matches the ones used during the columns
-                for col in model_columns:
-                    if pd.api.types.is_numeric_dtype(df[col]):
-                        try:
-                            batch_df[col] = pd.to_numeric(batch_df[col], errors='raise')
-                        except ValueError:
-                            st.error(f"Column '{col}' must be numeric.")
-                            st.stop()
-                    else:
-                        #ensure columns are type string if that isnt the case
-                        batch_df[col] = batch_df[col].astype(str)
+                st.warning("No numerical columns found for correlation heatmap.")
 
-                if not set(model_columns).issubset(set(batch_df.columns)): #Fixed comparison
-                    st.error("The batch dataframe that contains different columns than the currently used training dataframe. Please upload the correct dataframe.")
-                    st.stop()
-
-                # Preprocess the batch data
-                batch_processed = st.session_state.preprocessor.transform(batch_df[model_columns])
-                # Make predictions
-                batch_predictions = st.session_state.model.predict(batch_processed)
-                batch_df['Prediction'] = batch_predictions
+        elif plot_type == "Box Plot":
+            fig = px.box(
+                df, x=x_col, y=y_col,
+                color=color_col
+            )
 
-                 #Add probability output if that function is available.
-                if hasattr(st.session_state.model.steps[-1][1], 'predict_proba'):
-                   batch_probabilities = st.session_state.model.predict_proba(batch_processed)
-                   for i in range(batch_probabilities.shape[1]): #Loop through and give each probability
-                        batch_df[f'Probability_Class_{i}'] = batch_probabilities[:, i]
+        elif plot_type == "Violin Plot":
+            fig = px.violin(
+                df, x=x_col, y=y_col,
+                box=True, points="all",
+                color=color_col
+            )
 
+        elif plot_type == "Time Series":
+            df = df.sort_values(by=x_col)
+            fig = px.line(
+                df, x=x_col, y=y_col,
+                color=color_col
+            )
 
+        elif plot_type == "Scatter Matrix":
+            fig = px.scatter_matrix(
+                df, dimensions=[x_col, y_col],
+                color=color_col
+            )
 
-                st.dataframe(batch_df)
+        if fig:
+            st.plotly_chart(fig, use_container_width=True)
+    except Exception as e:
+        st.error(f"An error occurred while generating the plot: {e}")
+
+    # Statistical Analysis Section
+    with st.expander("📊 Statistical Analysis", expanded=True):
+        analysis_type = st.selectbox("Select Analysis Type", [
+            "Descriptive Statistics", 
+            "Correlation Analysis", 
+            "Hypothesis Testing", 
+            "Distribution Fitting"
+        ])
 
-             # Download predictions
-                csv = batch_df.to_csv(index=False)
-                b64 = base64.b64encode(csv.encode()).decode()  # some strings
-                href = f'<a href="data:file/csv;base64,{b64}" download="predictions.csv">Download Predictions CSV</a>'
-                st.markdown(href, unsafe_allow_html=True)
+        if analysis_type == "Descriptive Statistics":
+            st.write(df.describe(include='all'))
 
-            except Exception as e:
-                st.error(f"Error processing batch file: {e}")
+        elif analysis_type == "Correlation Analysis":
+            numeric_cols = df.select_dtypes(include=np.number).columns
+            if len(numeric_cols) >= 2:
+                corr_method = st.selectbox("Correlation Method", [
+                    "Pearson", "Kendall", "Spearman"
+                ])
+                corr_matrix = df[numeric_cols].corr(method=corr_method.lower())
+                st.write(corr_matrix)
+                st.heatmap(corr_matrix, annot=True, cmap=color_palette)
+            else:
+                st.warning("Need at least 2 numeric columns for correlation analysis")
 
+        elif analysis_type == "Hypothesis Testing":
+            test_type = st.selectbox("Select Test Type", [
+                "T-test", "Chi-Squared Test", "ANOVA", "Mann-Whitney U"
+            ])
+            if test_type == "T-test":
+                col1 = st.selectbox("Column 1 (Numeric)", df.select_dtypes(include=np.number).columns)
+                col2 = st.selectbox("Column 2 (Categorical)", df.select_dtypes(include='object').columns)
+                if st.button("Run T-test"):
+                    groups = df.groupby(col2)[col1].apply(list)
+                    if len(groups) == 2:
+                        t_stat, p_value = stats.ttest_ind(groups.iloc[0], groups.iloc[1])
+                        st.write(f"T-statistic: {t_stat:.4f}")
+                        st.write(f"P-value: {p_value:.4f}")
+                        if p_value < 0.05:
+                            st.write("Reject the null hypothesis.")
+                        else:
+                            st.write("Fail to reject the null hypothesis.")
+                    else:
+                        st.write("Select a categorical column with exactly two categories.")
+
+        elif analysis_type == "Distribution Fitting":
+            numeric_col = st.selectbox("Select Numeric Column", df.select_dtypes(include=np.number).columns)
+            dist_types = ["Normal", "Log-Normal", "Exponential", "Gamma"]
+            selected_dist = st.selectbox("Select Distribution Type", dist_types)
+            if st.button("Fit Distribution"):
+                from scipy.stats import norm, lognorm, expon, gamma
+                dist_functions = {
+                    "Normal": norm,
+                    "Log-Normal": lognorm,
+                    "Exponential": expon,
+                    "Gamma": gamma
+                }
+                params = dist_functions[selected_dist].fit(df[numeric_col].dropna())
+                st.write(f"Fitted Parameters: {params}")
+
+    # Data Profiling Section
+    with st.expander("📝 Generate Full Data Profile", expanded=False):
+        if st.button("🚀 Generate Comprehensive Report"):
+            with st.spinner("Generating report..."):
+                pr = ProfileReport(df, explorative=True)
+                st_profile_report(pr)
+                
+# Insights Section
+elif app_mode == "Insights":
+    st.title("📊 Model Insights & Explainability")
+    st.markdown("""
+        **Understand and Interpret Your Model** with advanced explainability tools and visualizations.
+        Gain deeper insights into model behavior and predictions.
+    """)
+
+    if 'model' not in st.session_state or st.session_state.model is None:
+        st.warning("Please train a model in the Model Training section first.")
+        st.stop()
+
+    model = st.session_state.model.steps[-1][1]  # Get the trained model
+    preprocessor = st.session_state.model.steps[0][1]  # Get the preprocessor
+
+    # Model Summary
+    st.subheader("📝 Model Summary")
+    st.write(f"**Model Type:** {type(model).__name__}")
+    st.write(f"**Problem Type:** {'Regression' if hasattr(model, 'predict') else 'Classification'}")
+    st.write(f"**Training Date:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+
+    # Feature Importance
+    st.subheader("🔍 Feature Importance")
+    if hasattr(model, 'feature_importances_'):
+        importances = model.feature_importances_
+        feature_names = preprocessor.get_feature_names_out()
+        importance_df = pd.DataFrame({
+            'Feature': feature_names,
+            'Importance': importances
+        }).sort_values('Importance', ascending=False)
+        
+        fig, ax = plt.subplots()
+        sns.barplot(x='Importance', y='Feature', data=importance_df.head(10), ax=ax)
+        ax.set_title('Top 10 Feature Importances')
+        st.pyplot(fig)
     else:
-        st.write("Please train a model first in the 'Model Training' section.")
+        st.info("Feature importance not available for this model type.")
+
+    # SHAP Values
+    st.subheader("📊 SHAP Values")
+    if st.checkbox("Calculate SHAP Values (Warning: May be slow for large datasets)"):
+        try:
+            import shap
+            explainer = shap.TreeExplainer(model)
+            shap_values = explainer.shap_values(st.session_state.X_test_selected)
+            
+            # Summary Plot
+            st.write("### Summary Plot")
+            fig, ax = plt.subplots()
+            shap.summary_plot(shap_values, st.session_state.X_test_selected, feature_names=preprocessor.get_feature_names_out())
+            st.pyplot(fig)
+
+            # Force Plot for Individual Predictions
+            st.write("### Individual Prediction Explanation")
+            sample_idx = st.slider("Select Sample Index", 0, len(st.session_state.X_test_selected)-1, 0)
+            fig, ax = plt.subplots()
+            shap.force_plot(explainer.expected_value, shap_values[sample_idx], st.session_state.X_test_selected[sample_idx], 
+                           feature_names=preprocessor.get_feature_names_out(), matplotlib=True, show=False)
+            st.pyplot(fig)
+        except Exception as e:
+            st.error(f"SHAP calculation failed: {e}")
+
+    # Partial Dependence Plots
+    st.subheader("📈 Partial Dependence Plots")
+    if hasattr(model, 'predict'):
+        feature_to_plot = st.selectbox("Select Feature for PDP", preprocessor.get_feature_names_out())
+        if st.button("Generate PDP"):
+            from sklearn.inspection import PartialDependenceDisplay
+            fig, ax = plt.subplots()
+            PartialDependenceDisplay.from_estimator(
+                model, st.session_state.X_test_selected, 
+                features=[feature_to_plot], 
+                feature_names=preprocessor.get_feature_names_out(),
+                ax=ax
+            )
+            st.pyplot(fig)
+
+    # Model Performance Over Time
+    st.subheader("⏳ Model Performance Over Time")
+    if st.checkbox("Track Performance Over Time"):
+        performance_history = {
+            'timestamp': [],
+            'metric': [],
+            'value': []
+        }
+        
+        if hasattr(model, 'predict'):
+            y_pred = model.predict(st.session_state.X_test_selected)
+            mse = mean_squared_error(st.session_state.y_test, y_pred)
+            performance_history['timestamp'].append(datetime.now())
+            performance_history['metric'].append('MSE')
+            performance_history['value'].append(mse)
+        
+        performance_df = pd.DataFrame(performance_history)
+        st.line_chart(performance_df.set_index('timestamp'))
+
+    # Model Debugging
+    st.subheader("🐛 Model Debugging")
+    if st.checkbox("Enable Debug Mode"):
+        st.write("### Model Parameters")
+        st.json(model.get_params())
+
+        st.write("### Training Data Summary")
+        st.write(f"Number of Samples: {st.session_state.X_train_selected.shape[0]}")
+        st.write(f"Number of Features: {st.session_state.X_train_selected.shape[1]}")
+
+    # Export Insights
+    st.subheader("💾 Export Insights")
+    if st.button("Export Insights as PDF"):
+        try:
+            from fpdf import FPDF
+            pdf = FPDF()
+            pdf.add_page()
+            pdf.set_font("Arial", size=12)
+            pdf.cell(200, 10, txt="Model Insights Report", ln=True, align='C')
+            pdf.cell(200, 10, txt=f"Model Type: {type(model).__name__}", ln=True)
+            pdf.cell(200, 10, txt=f"Problem Type: {'Regression' if hasattr(model, 'predict') else 'Classification'}", ln=True)
+            pdf.output("model_insights.pdf")
+            st.success("Insights exported successfully!")
+        except Exception as e:
+            st.error(f"Export failed: {e}")
         
 
+# Predictions Section
+elif app_mode == "Predictions":
+    st.title("🔮 Prediction Studio")
+    st.markdown("""
+        **Make Predictions** with your trained model and explore prediction explanations.
+        Generate batch predictions and export results.
+    """)
+
+    if 'model' not in st.session_state or st.session_state.model is None:
+        st.warning("Please train a model in the Model Training section first.")
+        st.stop()
+
+    model = st.session_state.model.steps[-1][1]  # Get the trained model
+    preprocessor = st.session_state.model.steps[0][1]  # Get the preprocessor
+
+    # Single Prediction
+    st.subheader("🎯 Single Prediction")
+    input_data = {}
+    feature_names = preprocessor.get_feature_names_out()
+    for feature in feature_names:
+        if feature in st.session_state.cleaned_data.columns:
+            if pd.api.types.is_numeric_dtype(st.session_state.cleaned_data[feature]):
+                input_data[feature] = st.number_input(f"Enter {feature}", value=st.session_state.cleaned_data[feature].mean())
+            else:
+                input_data[feature] = st.selectbox(f"Select {feature}", st.session_state.cleaned_data[feature].unique())
 
-elif app_mode == "Visualization Lab":
-    st.title("🔬 Advanced Data Visualization and Clustering Lab")
+    if st.button("Make Prediction"):
+        try:
+            input_df = pd.DataFrame([input_data])
+            input_processed = preprocessor.transform(input_df)
+            prediction = model.predict(input_processed)[0]
+            
+            st.write(f"**Prediction:** {prediction}")
+            
+            if hasattr(model, 'predict_proba'):
+                probabilities = model.predict_proba(input_processed)[0]
+                st.write("**Prediction Probabilities:**")
+                st.bar_chart(probabilities)
 
-    # Initialize session state for cleaned data
-    if 'cleaned_data' not in st.session_state:
-        st.session_state.cleaned_data = None
+            # SHAP Explanation
+            if st.checkbox("Show SHAP Explanation"):
+                try:
+                    import shap
+                    explainer = shap.TreeExplainer(model)
+                    shap_values = explainer.shap_values(input_processed)
+                    
+                    st.write("### SHAP Values")
+                    fig, ax = plt.subplots()
+                    shap.force_plot(explainer.expected_value, shap_values, input_processed, 
+                                   feature_names=feature_names, matplotlib=True, show=False)
+                    st.pyplot(fig)
+                except Exception as e:
+                    st.error(f"SHAP calculation failed: {e}")
 
-    # Sample data upload (replace with your data loading logic)
-    uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
-    if uploaded_file is not None:
-        try:
-            df = pd.read_csv(uploaded_file)
-            st.session_state.cleaned_data = df
-            st.success("Data loaded successfully!")
         except Exception as e:
-            st.error(f"Error loading data: {e}")
+            st.error(f"Prediction failed: {e}")
 
-    if st.session_state.cleaned_data is not None:
-        df = st.session_state.cleaned_data.copy()
-
-        # Visualization Type Selection
-        visualization_type = st.selectbox("Select Visualization Type", [
-            "Pair Plot", "Parallel Coordinates Plot", "Andrews Curves", "Pie Chart",
-            "Area Chart", "Density Contour", "Sunburst Chart", "Funnel Chart", "Clustering Analysis"
-        ])
+    # Batch Predictions
+    st.subheader("📂 Batch Predictions")
+    batch_file = st.file_uploader("Upload CSV for Batch Predictions", type=["csv"])
+    if batch_file is not None:
+        try:
+            batch_df = pd.read_csv(batch_file)
+            batch_processed = preprocessor.transform(batch_df)
+            batch_predictions = model.predict(batch_processed)
+            batch_df['Prediction'] = batch_predictions
 
-        if visualization_type == "Pair Plot":
-            st.subheader("Pair Plot")
-            cols_for_pairplot = st.multiselect("Select Columns for Pair Plot", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:3])
-            if cols_for_pairplot:
-                fig = px.scatter_matrix(df, dimensions=cols_for_pairplot)
-                st.plotly_chart(fig, use_container_width=True)
-
-        elif visualization_type == "Parallel Coordinates Plot":
-            st.subheader("Parallel Coordinates Plot")
-            cols_for_parallel = st.multiselect("Select Columns for Parallel Coordinates", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:5])
-            if cols_for_parallel:
-                fig = px.parallel_coordinates(df[cols_for_parallel], color=df[cols_for_parallel[0]] if cols_for_parallel else None)
-                st.plotly_chart(fig, use_container_width=True)
-
-        elif visualization_type == "Andrews Curves":
-            st.subheader("Andrews Curves")
-            cols_for_andrews = st.multiselect("Select Columns for Andrews Curves", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:5])
-            if cols_for_andrews:
-                fig = px.andrews_curves(df[cols_for_andrews + [df.columns[0]]], class_column=df.columns[0])
-                st.plotly_chart(fig, use_container_width=True)
-
-        elif visualization_type == "Pie Chart":
-            st.subheader("Pie Chart")
-            col_for_pie = st.selectbox("Select Column for Pie Chart", df.columns)
-            fig = px.pie(df, names=col_for_pie)
-            st.plotly_chart(fig, use_container_width=True)
+            if hasattr(model, 'predict_proba'):
+                probabilities = model.predict_proba(batch_processed)
+                for i in range(probabilities.shape[1]):
+                    batch_df[f'Probability_Class_{i}'] = probabilities[:, i]
 
-        elif visualization_type == "Area Chart":
-            st.subheader("Area Chart")
-            cols_for_area = st.multiselect("Select Columns for Area Chart", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:3])
-            if cols_for_area:
-                fig = px.area(df[cols_for_area])
-                st.plotly_chart(fig, use_container_width=True)
-
-        elif visualization_type == "Density Contour":
-            st.subheader("Density Contour")
-            x_col = st.selectbox("Select X Column for Density Contour", df.select_dtypes(include=np.number).columns.tolist())
-            y_col = st.selectbox("Select Y Column for Density Contour", df.select_dtypes(include=np.number).columns.tolist())
-            fig = px.density_contour(df, x=x_col, y=y_col)
-            st.plotly_chart(fig, use_container_width=True)
+            st.write("### Predictions Preview")
+            st.dataframe(batch_df.head())
 
-        elif visualization_type == "Sunburst Chart":
-            st.subheader("Sunburst Chart")
-            path_cols = st.multiselect("Select Path Columns for Sunburst Chart", df.columns)
-            if path_cols:
-                fig = px.sunburst(df, path=path_cols)
-                st.plotly_chart(fig, use_container_width=True)
-
-        elif visualization_type == "Funnel Chart":
-            st.subheader("Funnel Chart")
-            x_col = st.selectbox("Select X Column for Funnel Chart (Values)", df.select_dtypes(include=np.number).columns.tolist())
-            y_col = st.selectbox("Select Y Column for Funnel Chart (Categories)", df.columns)
-            fig = px.funnel(df, x=x_col, y=y_col)
-            st.plotly_chart(fig, use_container_width=True)
+            # Download Predictions
+            csv = batch_df.to_csv(index=False)
+            b64 = base64.b64encode(csv.encode()).decode()
+            href = f'<a href="data:file/csv;base64,{b64}" download="predictions.csv">Download Predictions CSV</a>'
+            st.markdown(href, unsafe_allow_html=True)
 
-        elif visualization_type == "Clustering Analysis":
-            st.subheader("Clustering Analysis")
-            numerical_cols = df.select_dtypes(include=np.number).columns.tolist()
+        except Exception as e:
+            st.error(f"Batch prediction failed: {e}")
 
-            if not numerical_cols:
-                st.warning("No numerical columns found for clustering.")
+    # Prediction Analysis
+    st.subheader("📊 Prediction Analysis")
+    if st.checkbox("Analyze Predictions"):
+        try:
+            y_pred = model.predict(st.session_state.X_test_selected)
+            y_test = st.session_state.y_test
+
+            if hasattr(model, 'predict'):
+                fig, ax = plt.subplots()
+                ax.scatter(y_test, y_pred)
+                ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2)
+                ax.set_xlabel('Actual')
+                ax.set_ylabel('Predicted')
+                ax.set_title('Actual vs Predicted')
+                st.pyplot(fig)
             else:
-                cluster_cols = st.multiselect("Select Columns for Clustering", numerical_cols, default=numerical_cols[:2] if len(numerical_cols) >= 2 else numerical_cols)
-
-                if cluster_cols:
-                    try:
-                        scaler = StandardScaler()
-                        scaled_data = scaler.fit_transform(df[cluster_cols])
-                        n_clusters = st.slider("Number of Clusters", 2, 10, 3, help="Number of clusters to form.")
-                        kmeans = KMeans(n_clusters=n_clusters, random_state=42)
-                        clusters = kmeans.fit_predict(scaled_data)
-                        df['Cluster'] = clusters
-
-                        if len(cluster_cols) == 2:
-                            fig = px.scatter(df, x=cluster_cols[0], y=cluster_cols[1], color='Cluster', title="K-Means Clustering")
-                            st.plotly_chart(fig, use_container_width=True)
-                        elif len(cluster_cols) == 3:
-                            fig = px.scatter_3d(df, x=cluster_cols[0], y=cluster_cols[1], z=cluster_cols[2], color='Cluster', title="K-Means Clustering (3D)")
-                            st.plotly_chart(fig, use_container_width=True)
-                        else:
-                            st.write("Clustering visualization is only supported for 2 or 3 selected columns.")
-                        st.success("Clustering applied successfully!")
-
-                        #Add clustering performance in clustering analysis
-                        if len(cluster_cols) >= 2:  # Evaluate Silhouette Score
-                            try:
-                                silhouette_avg = silhouette_score(scaled_data, clusters)
-                                st.write(f"Silhouette Score: {silhouette_avg:.4f}")
-                            except:
-                                st.write("Could not compute silhouette score")
-
-                        #Add dimensionality reduction option and 2d/3d plots
-
-                            dimension_reduction = st.selectbox("Dimensionality Reduction", ["None", "PCA"])
-                            if dimension_reduction == "PCA":
-                                n_components = st.slider("Number of Components", 2, min(3, len(cluster_cols)), 2)
-                                pca = PCA(n_components=n_components)
-                                principal_components = pca.fit_transform(scaled_data)
-                                pca_df = pd.DataFrame(data=principal_components, columns=[f'PC{i + 1}' for i in range(n_components)])
-                                pca_df['Cluster'] = clusters  # Add Cluster
-
-                                if len(cluster_cols) >= 2: #plotting section
-                                    fig = None #Initialize fig
-                                    if dimension_reduction == "None":
-                                        if len(cluster_cols) == 2:
-                                            fig = px.scatter(df, x=cluster_cols[0], y=cluster_cols[1], color='Cluster', title="K-Means Clustering")
-                                            st.plotly_chart(fig, use_container_width=True)
-                                        elif len(cluster_cols) == 3:
-                                            fig = px.scatter_3d(df, x=cluster_cols[0], y=cluster_cols[1], z=cluster_cols[2], color='Cluster', title="K-Means Clustering (3D)")
-                                            st.plotly_chart(fig, use_container_width=True)
-                                        else:
-                                            st.write("Clustering visualization is only supported for 2 or 3 selected columns.")
-
-                                    elif dimension_reduction == "PCA":
-                                        if n_components == 2:
-                                            fig = px.scatter(pca_df, x='PC1', y='PC2', color='Cluster', title="K-Means Clustering (PCA - 2D)")
-                                            st.plotly_chart(fig, use_container_width=True)
-                                        elif n_components == 3:
-                                            fig = px.scatter_3d(pca_df, x='PC1', y='PC2', z='PC3', color='Cluster', title="K-Means Clustering (PCA - 3D)")
-                                            st.plotly_chart(fig, use_container_width=True)
-
-                                        else:
-                                            st.write("PCA visualization is only supported for 2 or 3 components.")
-
-                    except Exception as e:
-                        st.error(f"An error occurred during clustering: {e}")
+                conf_matrix = confusion_matrix(y_test, y_pred)
+                fig, ax = plt.subplots()
+                sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=ax)
+                ax.set_xlabel('Predicted Labels')
+                ax.set_ylabel('True Labels')
+                ax.set_title('Confusion Matrix')
+                st.pyplot(fig)
+        except Exception as e:
+            st.error(f"Prediction analysis failed: {e}")
 
+    # Prediction Export
+    st.subheader("💾 Export Predictions")
+    if st.button("Export Predictions as PDF"):
+        try:
+            from fpdf import FPDF
+            pdf = FPDF()
+            pdf.add_page()
+            pdf.set_font("Arial", size=12)
+            pdf.cell(200, 10, txt="Predictions Report", ln=True, align='C')
+            pdf.cell(200, 10, txt=f"Model Type: {type(model).__name__}", ln=True)
+            pdf.cell(200, 10, txt=f"Problem Type: {'Regression' if hasattr(model, 'predict') else 'Classification'}", ln=True)
+            pdf.output("predictions_report.pdf")
+            st.success("Predictions exported successfully!")
+        except Exception as e:
+            st.error(f"Export failed: {e}")
+
+# Neural Network Studio Section
 elif app_mode == "Neural Network Studio":
     st.title("🧠 Neural Network Studio")
+    st.markdown("""
+        **Build and Train Neural Networks** with advanced configurations and visualizations.
+        Explore deep learning models with ease.
+    """)
+
+    if 'cleaned_data' not in st.session_state or st.session_state.cleaned_data is None:
+        st.warning("Please clean your data in the Smart Cleaning section first.")
+        st.stop()
+
+    df = st.session_state.cleaned_data.copy()
+
+    # Target Variable Selection
+    st.subheader("🎯 Target Variable")
+    target_column = st.selectbox("Select Target Variable", df.columns, help="Choose the column to predict.")
+
+    # Problem Type Selection
+    st.subheader("📝 Problem Type")
+    problem_type = st.radio("Select Problem Type", ["Regression", "Classification"], help="Choose the type of machine learning problem.")
+
+    # Feature Selection
+    st.subheader("🔧 Feature Selection")
+    use_all_features = st.checkbox("Use All Features", value=True, help="Select to use all features for training. Deselect to manually choose features.")
+    if use_all_features:
+        feature_columns = df.drop(columns=[target_column]).columns.tolist()
+    else:
+        feature_columns = st.multiselect("Select Feature Columns", df.drop(columns=[target_column]).columns, help="Choose the features you want to use for prediction.")
+
+    # Neural Network Configuration
+    st.subheader("⚙️ Neural Network Configuration")
+    with st.expander("Configure Neural Network", expanded=True):
+        hidden_layers = st.slider("Number of Hidden Layers", 1, 5, 2)
+        neurons_per_layer = st.slider("Neurons per Layer", 10, 200, 50)
+        activation = st.selectbox("Activation Function", ["relu", "tanh", "sigmoid"])
+        learning_rate = st.slider("Learning Rate", 0.001, 0.1, 0.01)
+        epochs = st.slider("Epochs", 10, 200, 50)
+        batch_size = st.slider("Batch Size", 16, 128, 32)
+
+    # Train-Test Split
+    st.subheader("✂️ Train-Test Split")
+    test_size = st.slider("Test Size", 0.1, 0.5, 0.2, help="Proportion of the dataset to include in the test split.")
+
+    # Model Training
+    if st.button("🚀 Train Neural Network"):
+        with st.spinner("Training neural network..."):
+            try:
+                X = df[feature_columns]
+                y = df[target_column]
 
-    if st.session_state.cleaned_data is not None:
-        df = st.session_state.cleaned_data.copy()
+                # Train-Test Split
+                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
 
-        # Target Variable Selection
-        target_column = st.selectbox("Select Target Variable", df.columns, help="Choose the column you want to predict.")
+                # Preprocessing Pipeline
+                numeric_features = X.select_dtypes(include=np.number).columns
+                categorical_features = X.select_dtypes(exclude=np.number).columns
 
-        # Problem Type Selection
-        problem_type = st.radio("Select Problem Type", ["Regression", "Classification"], help="Choose the type of machine learning problem.")
+                numeric_transformer = Pipeline(steps=[
+                    ('imputer', SimpleImputer(strategy='median')),
+                    ('scaler', StandardScaler())
+                ])
 
-        # Feature Selection (optional)
-        use_all_features = st.checkbox("Use All Features", value=True, help="Select to use all features for training. Deselect to manually choose features.")
-        if use_all_features:
-            feature_columns = df.drop(columns=[target_column]).columns.tolist()
-        else:
-            feature_columns = st.multiselect("Select Feature Columns", df.drop(columns=[target_column]).columns, help="Choose the features you want to use for prediction.")
-
-        # Model Selection
-        model_type = st.selectbox("Select Neural Network Model", [
-            "Simple Neural Network", "Convolutional Neural Network (CNN)", "Recurrent Neural Network (RNN)"
-        ], help="Choose the neural network model to use.")
-
-        # Hyperparameter Tuning
-        with st.expander("Hyperparameter Tuning", expanded=False):
-            if model_type == "Simple Neural Network":
-                hidden_layers = st.slider("Number of Hidden Layers", 1, 5, 2, help="Number of hidden layers in the network.")
-                neurons_per_layer = st.slider("Neurons per Layer", 10, 200, 50, help="Number of neurons in each hidden layer.")
-                epochs = st.slider("Epochs", 10, 200, 50, help="Number of epochs for training.")
-                batch_size = st.slider("Batch Size", 16, 128, 32, help="Batch size for training.")
-            elif model_type == "Convolutional Neural Network (CNN)":
-                epochs_cnn = st.slider("Epochs", 10, 200, 50, help="Number of epochs for CNN training.")
-                batch_size_cnn = st.slider("Batch Size", 16, 128, 32, help="Batch size for CNN training.")
-            elif model_type == "Recurrent Neural Network (RNN)":
-                epochs_rnn = st.slider("Epochs", 10, 200, 50, help="Number of epochs for RNN training.")
-                batch_size_rnn = st.slider("Batch Size", 16, 128, 32, help="Batch size for RNN training.")
-                sequence_length = st.slider("Sequence Length (for RNN)", 10, 100, 30, help="Length of the input sequences for RNN.")
-        # Train-Test Split
-        test_size = st.slider("Test Size", 0.1, 0.5, 0.2, help="Proportion of the data to use for testing.")
-
-        # Model Training Button
-        if st.button("Train Neural Network Model"):
-            with st.spinner("Training neural network model..."):
-                try:
-                    # Split data
-                    X = df[feature_columns]
-                    y = df[target_column]
-                    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
-
-                    # Preprocessing
-                    numeric_transformer = Pipeline(steps=[
-                        ('imputer', SimpleImputer(strategy='median')),
-                        ('scaler', StandardScaler())
-                    ])
-                    categorical_transformer = Pipeline(steps=[
-                        ('imputer', SimpleImputer(strategy='most_frequent')),
-                        ('onehot', OneHotEncoder(handle_unknown='ignore'))
+                categorical_transformer = Pipeline(steps=[
+                    ('imputer', SimpleImputer(strategy='most_frequent')),
+                    ('onehot', OneHotEncoder(handle_unknown='ignore'))
+                ])
+
+                preprocessor = ColumnTransformer(
+                    transformers=[
+                        ('num', numeric_transformer, numeric_features),
+                        ('cat', categorical_transformer, categorical_features)
                     ])
 
-                    numeric_features = X_train.select_dtypes(include=np.number).columns
-                    categorical_features = X_train.select_dtypes(include='object').columns
-
-                    preprocessor = ColumnTransformer(
-                        transformers=[
-                            ('num', numeric_transformer, numeric_features),
-                            ('cat', categorical_transformer, categorical_features)
-                        ])
-
-                    X_train_processed = preprocessor.fit_transform(X_train)
-                    X_test_processed = preprocessor.transform(X_test)
-
-                    # Neural Network Model Selection and Training
-                    tf.random.set_seed(42)  # for reproducibility
-
-                    # Callbacks (Early Stopping)
-                    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
-
-                    if model_type == "Simple Neural Network":
-                        model = keras.Sequential()
-                        model.add(layers.Input(shape=(X_train_processed.shape[1],)))
-                        for _ in range(hidden_layers):
-                            model.add(layers.Dense(neurons_per_layer, activation=activation))  # Use the selected activation
-                        model.add(
-                            layers.Dense(1 if problem_type == "Regression" else len(np.unique(y_train)),
-                                         activation='linear' if problem_type == "Regression" else 'softmax'))
-
-                        optimizer = keras.optimizers.Adam(learning_rate=learning_rate)  # Use the learning rate
-
-                        model.compile(optimizer=optimizer,
-                                      loss='mse' if problem_type == "Regression" else 'sparse_categorical_crossentropy',
-                                      metrics=['mae'] if problem_type == "Regression" else ['accuracy'])
-
-                        history = model.fit(X_train_processed, y_train, epochs=epochs, batch_size=batch_size,
-                                              validation_split=0.2, verbose=0,
-                                              callbacks=[early_stopping])  # Added early stopping
-
-                        y_pred = model.predict(X_test_processed)
-                        if problem_type == "Classification":
-                            y_pred = np.argmax(y_pred, axis=1)
-
-                    elif model_type == "Convolutional Neural Network (CNN)":
-                        X_train_cnn = np.expand_dims(X_train_processed, axis=2)
-                        X_test_cnn = np.expand_dims(X_test_processed, axis=2)
-
-                        model = keras.Sequential()
-                        model.add(layers.Conv1D(filters=filters, kernel_size=kernel_size, activation='relu',
-                                                 input_shape=(X_train_cnn.shape[1], 1)))
-                        model.add(layers.MaxPooling1D(pool_size=pooling_size))
-                        model.add(layers.Flatten())
-                        model.add(layers.Dense(50, activation='relu'))
-                        model.add(
-                            layers.Dense(1 if problem_type == "Regression" else len(np.unique(y_train)),
-                                         activation='linear' if problem_type == "Regression" else 'softmax'))
-
-                        optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
-                        model.compile(optimizer=optimizer,
-                                      loss='mse' if problem_type == "Regression" else 'sparse_categorical_crossentropy',
-                                      metrics=['mae'] if problem_type == "Regression" else ['accuracy'])
-
-                        history = model.fit(X_train_cnn, y_train, epochs=epochs_cnn, batch_size=batch_size_cnn,
-                                              validation_split=0.2, verbose=0,
-                                               callbacks=[early_stopping])
-
-                        y_pred = model.predict(X_test_cnn)
-                        if problem_type == "Classification":
-                            y_pred = np.argmax(y_pred, axis=1)
-
-                    elif model_type == "Recurrent Neural Network (RNN)":
-                        try:
-                            X_train_rnn = np.reshape(X_train_processed, (
-                                X_train_processed.shape[0], sequence_length,
-                                X_train_processed.shape[1] // sequence_length))
-                            X_test_rnn = np.reshape(X_test_processed, (
-                                X_test_processed.shape[0], sequence_length, X_test_processed.shape[1] // sequence_length))
-
-                            model = keras.Sequential()
-                            model.add(layers.SimpleRNN(units, activation='relu',  # Use the selected units
-                                                      dropout=dropout_rate,
-                                                      input_shape=(X_train_rnn.shape[1], X_train_rnn.shape[2])))
-                            model.add(
-                                layers.Dense(1 if problem_type == "Regression" else len(np.unique(y_train)),
-                                             activation='linear' if problem_type == "Regression" else 'softmax'))
-
-                            optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
-                            model.compile(optimizer=optimizer,
-                                          loss='mse' if problem_type == "Regression" else 'sparse_categorical_crossentropy',
-                                          metrics=['mae'] if problem_type == "Regression" else ['accuracy'])
-
-                            history = model.fit(X_train_rnn, y_train, epochs=epochs_rnn, batch_size=batch_size_rnn,
-                                                  validation_split=0.2, verbose=0,
-                                                   callbacks=[early_stopping])
-
-                            y_pred = model.predict(X_test_rnn)
-                            if problem_type == "Classification":
-                                y_pred = np.argmax(y_pred, axis=1)
-                        except Exception as e:
-                            st.error(f"Error during RNN training: {e}")
-                            st.stop()  # Stop execution if RNN fails
-
-                    # Evaluation
-                    if problem_type == "Regression":
-                        mse = mean_squared_error(y_test, y_pred)
-                        rmse = np.sqrt(mse)
-                        mae = mean_absolute_error(y_test, y_pred)
-                        r2 = r2_score(y_test, y_pred)
-                        st.write(f"Mean Squared Error: {mse:.4f}")
-                        st.write(f"Root Mean Squared Error: {rmse:.4f}")
-                        st.write(f"Mean Absolute Error: {mae:.4f}")
-                        st.write(f"R-squared: {r2:.4f}")
-                    else:
-                        accuracy = accuracy_score(y_test, y_pred)
-                        precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
-                        recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
-                        f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
-                        st.write(f"Accuracy: {accuracy:.4f}")
-                        st.write(f"Precision: {precision:.4f}")
-                        st.write(f"Recall: {recall:.4f}")
-                        st.write(f"F1 Score: {f1:.4f}")
-                        st.write("Classification Report:")
-                        st.text(classification_report(y_test, y_pred))
-
-                    # Visualization
-                    st.subheader("Training History")
-                    fig, ax = plt.subplots()  # Use matplotlib directly
-
-                    ax.plot(history.history['loss'], label='loss')
-                    ax.plot(history.history['val_loss'], label='val_loss')
-                    ax.set_xlabel('Epoch')
-                    ax.set_ylabel('Loss')
-                    ax.legend()
-                    st.pyplot(fig)  # Display with st.pyplot
-
-                    st.success("Model trained successfully!")
+                X_train_processed = preprocessor.fit_transform(X_train)
+                X_test_processed = preprocessor.transform(X_test)
+
+                # Neural Network Model
+                model = keras.Sequential()
+                model.add(layers.Input(shape=(X_train_processed.shape[1],)))
+                for _ in range(hidden_layers):
+                    model.add(layers.Dense(neurons_per_layer, activation=activation))
+                model.add(layers.Dense(1 if problem_type == "Regression" else len(np.unique(y_train)),
+                             activation='linear' if problem_type == "Regression" else 'softmax'))
+
+                # Compile the model
+                optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
+                model.compile(optimizer=optimizer,
+                               loss='mse' if problem_type == "Regression" else 'sparse_categorical_crossentropy',
+                               metrics=['mae'] if problem_type == "Regression" else ['accuracy'])
+
+                # Train the model
+                history = model.fit(X_train_processed, y_train, epochs=epochs, batch_size=batch_size,
+                                     validation_split=0.2, verbose=0)
+
+                # Store model and preprocessor
+                st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
+                st.session_state.preprocessor = preprocessor
+
+                # Store the test data
+                st.session_state.X_train_selected = X_train_processed
+                st.session_state.X_test_selected = X_test_processed
+                st.session_state.y_train = y_train
+                st.session_state.y_test = y_test
+
+                # Model Evaluation
+                y_pred = model.predict(X_test_processed)
+                if problem_type == "Regression":
+                    mse = mean_squared_error(y_test, y_pred)
+                    rmse = np.sqrt(mse)
+                    mae = mean_absolute_error(y_test, y_pred)
+                    r2 = r2_score(y_test, y_pred)
+                    st.write(f"Mean Squared Error: {mse:.4f}")
+                    st.write(f"Root Mean Squared Error: {rmse:.4f}")
+                    st.write(f"Mean Absolute Error: {mae:.4f}")
+                    st.write(f"R-squared: {r2:.4f}")
+                else:
+                    accuracy = accuracy_score(y_test, y_pred)
+                    precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
+                    recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
+                    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
+                    st.write(f"Accuracy: {accuracy:.4f}")
+                    st.write(f"Precision: {precision:.4f}")
+                    st.write(f"Recall: {recall:.4f}")
+                    st.write(f"F1 Score: {f1:.4f}")
+                    st.write("Classification Report:")
+                    st.text(classification_report(y_test, y_pred))
+
+                # Visualization
+                st.subheader("📊 Training History")
+                fig, ax = plt.subplots()
+                ax.plot(history.history['loss'], label='loss')
+                ax.plot(history.history['val_loss'], label='val_loss')
+                ax.set_xlabel('Epoch')
+                ax.set_ylabel('Loss')
+                ax.legend()
+                st.pyplot(fig)
+
+                st.success("Neural network trained successfully!")
+            except Exception as e:
+                st.error(f"An error occurred during training: {e}")
 
-                except Exception as e:
-                    st.error(f"An error occurred during training: {e}")
\ No newline at end of file
+    # Model Saving
+    if st.session_state.model is not None:
+        st.subheader("💾 Save Model")
+        model_filename = st.text_input("Enter Model Filename (without extension)", "neural_network")
+        if st.button("Save Model"):
+            try:
+                joblib.dump(st.session_state.model, f"{model_filename}.joblib")
+                st.success(f"Model saved as {model_filename}.joblib")
+            except Exception as e:
+                st.error(f"Error saving model: {e}")
\ No newline at end of file