Spaces:

CosmickVisions
/

Data-Vision

Running

App Files Files Community

CosmickVisions commited on Mar 1

Commit

b86197d

verified ·

1 Parent(s): 977f130

Update app.py

Browse files

Files changed (1) hide show

app.py +245 -300

app.py CHANGED Viewed

@@ -235,7 +235,6 @@ if app_mode == "Data Upload":
                 pr = ProfileReport(df, explorative=True)
                 st_profile_report(pr)
-# Smart Cleaning Section
 elif app_mode == "Smart Cleaning":
     st.title("🧼 Intelligent Data Cleaning")
     st.markdown("""
@@ -260,43 +259,60 @@ elif app_mode == "Smart Cleaning":
     df = st.session_state.data_versions[st.session_state.current_version].copy()
     cleaning_actions = st.session_state.get('cleaning_actions', [])
-    # Version Control
     with st.expander("⏪ Version Control", expanded=True):
         col1, col2 = st.columns(2)
         with col1:
-            if st.button("Undo Last Action") and st.session_state.current_version > 0:
                 st.session_state.current_version -= 1
                 st.experimental_rerun()
         with col2:
-            if st.button("Redo Next Action") and st.session_state.current_version < len(st.session_state.data_versions)-1:
                 st.session_state.current_version += 1
                 st.experimental_rerun()
-        st.caption(f"Current Version: {st.session_state.current_version+1}/{len(st.session_state.data_versions)}")
-    # Data Health Dashboard
     st.subheader("📊 Data Health Dashboard")
-    with st.expander("Show Comprehensive Data Report"):
         from pandas_profiling import ProfileReport
         pr = ProfileReport(df, explorative=True)
         st_profile_report(pr)
-    # Enhanced Health Summary
     col1, col2, col3, col4 = st.columns(4)
     with col1:
-        st.plotly_chart(px.bar(df.isna().sum(), title="Missing Values per Column").update_layout(showlegend=False))
     with col2:
-        st.plotly_chart(px.pie(values=df.dtypes.value_counts(), names=df.dtypes.value_counts().index,
-                            title="Data Type Distribution"))
     with col3:
-        st.metric("Total Rows", len(df))
     with col4:
-        st.metric("Total Columns", len(df.columns))
-    # Cleaning Operations
     st.subheader("🔧 Cleaning Operations")
-    # 1. Missing Value Handling - Enhanced
-    with st.expander("🕳️ Handle Missing Values", expanded=True):
         missing_cols = df.columns[df.isna().any()].tolist()
         if missing_cols:
             st.write("Columns with missing values:")
@@ -310,86 +326,29 @@ elif app_mode == "Smart Cleaning":
                 "Deep Learning Imputation"
             ], horizontal=True)
-            preview_expander = st.expander("Preview Data Before/After")
-            if method in ["KNN Imputation", "MICE Imputation", "Deep Learning Imputation"]:
-                numeric_cols = df[cols].select_dtypes(include=np.number).columns.tolist()
-                if len(numeric_cols) != len(cols):
-                    st.error("Non-numeric columns selected for numeric imputation. Please select only numeric columns.")
-                    st.stop()
             if st.button(f"Apply {method}"):
                 try:
                     original_df = df.copy()
-                    if method == "Drop Missing":
-                        df.dropna(subset=cols, inplace=True)
-                        action_msg = f"Dropped missing values in {cols}"
-                    elif method == "Mean/Median/Mode":
-                        strategy = st.selectbox("Strategy", ["mean", "median", "most_frequent"])
-                        for col in cols:
-                            if pd.api.types.is_numeric_dtype(df[col]):
-                                df[col].fillna(df[col].agg(strategy), inplace=True)
-                            else:
-                                df[col].fillna(df[col].mode()[0], inplace=True)
-                        action_msg = f"Filled missing values in {cols} using {strategy}"
-                    elif method == "KNN Imputation":
-                        n_neighbors = st.slider("Number of neighbors", 2, 15, 5)
-                        from sklearn.impute import KNNImputer
-                        imputer = KNNImputer(n_neighbors=n_neighbors)
-                        df[cols] = imputer.fit_transform(df[cols])
-                        action_msg = f"Applied KNN imputation (k={n_neighbors}) on {cols}"
-                    elif method == "MICE Imputation":
-                        from sklearn.experimental import enable_iterative_imputer
-                        from sklearn.impute import IterativeImputer
-                        imputer = IterativeImputer(random_state=42)
-                        df[cols] = imputer.fit_transform(df[cols])
-                        action_msg = f"Applied MICE imputation on {cols}"
-                    elif method == "Deep Learning Imputation":
-                        from sklearn.neural_network import MLPRegressor
-                        model = MLPRegressor(hidden_layer_sizes=(100,50), max_iter=1000)
-                        for col in cols:
-                            temp_df = df.dropna()
-                            X = temp_df.drop(columns=[col])
-                            y = temp_df[col]
-                            model.fit(X, y)
-                            mask = df[col].isna()
-                            df.loc[mask, col] = model.predict(df.loc[mask].drop(columns=[col]))
-                        action_msg = f"Applied Deep Learning imputation on {cols}"
-                    with preview_expander:
-                        col1, col2 = st.columns(2)
-                        with col1:
-                            st.write("Before:", original_df[cols].head(10))
-                        with col2:
-                            st.write("After:", df[cols].head(10))
-                    cleaning_actions.append(action_msg)
                     update_version(df)
                     st.success(f"{method} applied successfully! ✅")
                 except Exception as e:
                     st.error(f"Error: {str(e)}")
-                    st.stop()
         else:
             st.success("✨ No missing values found!")
-    # 2. Enhanced Duplicate Handling with Visualization
-    with st.expander("🔄 Handle Duplicates", expanded=True):
         duplicates = df.duplicated().sum()
         if duplicates > 0:
             st.plotly_chart(px.histogram(df, x=df.duplicated(), title="Duplicate Distribution"))
             dup_strategy = st.radio("Duplicate Strategy", [
                 "Remove All Duplicates",
                 "Keep First Occurrence",
                 "Keep Last Occurrence"
             ])
             if st.button("Handle Duplicates"):
                 original_count = len(df)
                 df = df.drop_duplicates(keep={
@@ -397,64 +356,45 @@ elif app_mode == "Smart Cleaning":
                     "Keep First Occurrence": 'first',
                     "Keep Last Occurrence": 'last'
                 }[dup_strategy])
-                st.plotly_chart(px.bar(x=["Before", "After"],
-                                    y=[original_count, len(df)],
-                                    title="Row Count Comparison"))
                 cleaning_actions.append(f"Removed {original_count - len(df)} duplicates")
                 update_version(df)
                 st.success(f"Removed {original_count - len(df)} duplicates! ✅")
         else:
             st.success("✨ No duplicates found!")
-    # 3. Enhanced Data Type Conversion with Preview
-    with st.expander("🔄 Convert Data Types", expanded=True):
         col1, col2 = st.columns(2)
         with col1:
             st.dataframe(df.dtypes.reset_index().rename(columns={0: 'Type', 'index': 'Column'}))
         with col2:
             col_to_convert = st.selectbox("Select column to convert", df.columns)
             new_type = st.selectbox("New Data Type", [
                 "String", "Integer", "Float",
                 "Boolean", "Datetime", "Category"
             ])
             if st.button("Convert Data Type"):
                 try:
-                    original_dtype = str(df[col_to_convert].dtype)
-                    # Conversion logic...
-                    st.write("Conversion Summary:")
-                    st.table(pd.DataFrame({
-                        "Column": [col_to_convert],
-                        "Original Type": [original_dtype],
-                        "New Type": [new_type]
-                    }))
                     cleaning_actions.append(f"Converted {col_to_convert} to {new_type}")
                     update_version(df)
                     st.success("Data type converted successfully! ✅")
                 except Exception as e:
                     st.error(f"Conversion failed: {str(e)}")
-    # 4. Enhanced Outlier Handling with Visualization
-    with st.expander("📈 Handle Outliers", expanded=True):
         numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
         if numeric_cols:
             outlier_col = st.selectbox("Select numeric column", numeric_cols)
-            col1, col2 = st.columns(2)
-            with col1:
-                st.plotly_chart(px.box(df, y=outlier_col, title="Original Distribution"))
-            with col2:
-                st.plotly_chart(px.histogram(df, x=outlier_col, title="Value Distribution"))
-            # Outlier handling logic...
         else:
             st.info("ℹ️ No numeric columns found for outlier detection")
@@ -482,14 +422,12 @@ elif app_mode == "Smart Cleaning":
         with col2:
             st.write("Cleaned Data Shape:", df.shape)
-        st.success("✅ Cleaned data saved successfully! You can now proceed to analysis.")
-# Advanced EDA Section
 elif app_mode == "Advanced EDA":
     st.title("🔍 Advanced Exploratory Data Analysis")
     st.markdown("""
-        **Interactive Data Exploration** with advanced statistical tools and visualizations.
-        Uncover hidden patterns and relationships in your data.
     """)
     if 'cleaned_data' not in st.session_state or st.session_state.cleaned_data is None:
@@ -504,220 +442,227 @@ elif app_mode == "Advanced EDA":
             'plot_type': "Histogram",
             'x_col': df.columns[0] if len(df.columns) > 0 else None,
             'y_col': df.columns[1] if len(df.columns) > 1 else None,
             'color_col': None,
-            'size_col': None,
-            'time_col': None,
-            'value_col': None,
-            'scatter_matrix_cols': df.select_dtypes(include=np.number).columns.tolist()[:5],
-            'color_palette': "Viridis",
             'hover_data_cols': [],
             'filter_col': None,
             'filter_options': []
         }
-    # Data Filtering Section
-    with st.expander("🔎 Data Filtering", expanded=True):
-        st.session_state.eda_config['filter_col'] = st.selectbox(
-            "Filter Column",
-            [None] + list(df.columns),
-            help="Choose a column to filter the data."
-        )
-        if st.session_state.eda_config['filter_col']:
-            unique_values = df[st.session_state.eda_config['filter_col']].unique()
-            st.session_state.eda_config['filter_options'] = st.multiselect(
-                "Filter Values",
-                unique_values,
-                default=unique_values,
-                help=f"Select values from '{st.session_state.eda_config['filter_col']}'"
-            )
-            df = df[df[st.session_state.eda_config['filter_col']].isin(
-                st.session_state.eda_config['filter_options']
-            )]
-    # Visualization Type Selection
-    st.sidebar.header("📊 Visualization Configuration")
-    plot_types = [
-        "Histogram", "Scatter Plot", "Box Plot", "Violin Plot",
-        "Correlation Heatmap", "Parallel Coordinates", "Pair Plot", "Density Contour",
-        "3D Scatter", "Time Series", "Bar Chart", "Pie Chart",  "Line Chart" # Removed the computationally expensive ones
-    ]
-    st.session_state.eda_config['plot_type'] = st.sidebar.selectbox(
-        "Choose Visualization",
-        plot_types,
-        index=0
-    )
-    # Dynamic Controls Based on Plot Type
-    plot_type = st.session_state.eda_config['plot_type']
-    def show_column_selectors(plot_type, df, config):
-        """Helper function to display column selectors based on plot type."""
-        if plot_type != "Correlation Heatmap":
-            config['x_col'] = st.sidebar.selectbox(
                 "X Axis",
                 df.columns,
-                index=df.columns.get_loc(config['x_col']) if config['x_col'] in df.columns else 0
             )
-        if plot_type in ["Scatter Plot", "Box Plot", "Violin Plot", "Time Series", "3D Scatter", "Histogram", "Line Chart"]:
-            config['y_col'] = st.sidebar.selectbox(
                 "Y Axis",
                 df.columns,
-                index=df.columns.get_loc(config['y_col']) if config['y_col'] in df.columns else 0
             )
-        if plot_type == "Time Series":
-            config['time_col'] = st.sidebar.selectbox(
-                "Time Column",
-                df.columns,
-                index=df.columns.get_loc(config['time_col']) if config['time_col'] in df.columns else 0
-            )
-            config['value_col'] = st.sidebar.selectbox(
-                "Value Column",
                 df.columns,
-                index=df.columns.get_loc(config['value_col']) if config['value_col'] in df.columns else 0
             )
-        if plot_type == "3D Scatter":
-            config['z_col'] = st.sidebar.selectbox(
-                "Z Axis",
-                df.columns,
-                index=df.columns.get_loc(config['z_col']) if config['z_col'] in df.columns else 0
             )
-            config['color_col'] = st.sidebar.selectbox(
-                "Color by",
                 [None] + list(df.columns)
             )
-        return config
-    st.session_state.eda_config = show_column_selectors(plot_type, df, st.session_state.eda_config)
-    # Advanced Plot Customization
-    with st.expander("🎨 Advanced Customization", expanded=False):
-        st.session_state.eda_config['color_palette'] = st.selectbox(
-            "Color Palette",
-            ["Viridis", "Plasma", "Magma", "Cividis", "RdBu", "Rainbow"]
-        )
-        st.session_state.eda_config['hover_data_cols'] = st.multiselect(
-            "Hover Data",
-            df.columns
-        )
-    # Plot Generation
-    try:
-        fig = None
-        config = st.session_state.eda_config
-        # Numeric Column Validation Helper
-        def check_numeric(col):
-            if not pd.api.types.is_numeric_dtype(df[col]):
-                st.error(f"Column '{col}' must be numeric for this plot type.")
-                st.stop()
-        if plot_type == "Histogram":
-            check_numeric(config['x_col'])
-            color_palette = config['color_palette']
-            colors = getattr(pc.sequential, color_palette)
-            fig = px.histogram(
-                df, x=config['x_col'], y=config['y_col'],
-                nbins=30, template="plotly_dark",
-                color=config['x_col'],
-                color_discrete_sequence = [colors[0]]
             )
-        elif plot_type == "Scatter Plot":
-            check_numeric(config['x_col'])
-            check_numeric(config['y_col'])
-            fig = px.scatter(
-                df, x=config['x_col'], y=config['y_col'],
-                color=config['color_col'],
-                size=config['size_col'],
-                hover_data=config['hover_data_cols']
             )
-        elif plot_type == "3D Scatter":
-            check_numeric(config['x_col'])
-            check_numeric(config['y_col'])
-            check_numeric(config['z_col'])
-            fig = px.scatter_3d(
-                df, x=config['x_col'], y=config['y_col'], z=config['z_col'],
-                color=config['color_col'],
-                color_discrete_sequence=[config['color_palette']]
             )
-        elif plot_type == "Correlation Heatmap":
-            numeric_df = df.select_dtypes(include=np.number)
-            if not numeric_df.empty:
-                corr = numeric_df.corr()
-                fig = px.imshow(
-                    corr, text_auto=True,
-                    color_continuous_scale=config['color_palette']
                 )
-            else:
-                st.warning("No numerical columns found for correlation heatmap.")
-        elif plot_type == "Box Plot":
-            fig = px.box(
-                df, x=config['x_col'], y=config['y_col'],
-                color=config['color_col']
-            )
-        elif plot_type == "Violin Plot":
-            fig = px.violin(
-                df, x=config['x_col'], y=config['y_col'],
-                box=True, points="all",
-                color=config['color_col']
-            )
-        elif plot_type == "Time Series":
-             # Time Series plots now require time_col and value_col
-            fig = px.line(
-                df, x=config['time_col'], y=config['value_col'],
-                color=config['color_col']
-            )
-        elif plot_type == "Parallel Coordinates":
-            numeric_df = df.select_dtypes(include=np.number)
-            if not numeric_df.empty:
-                fig = px.parallel_coordinates(numeric_df, color_continuous_scale=config['color_palette'])
-            else:
-                st.warning("No numerical columns found for parallel coordinates plot.")
-        elif plot_type == "Pair Plot":
-            numeric_cols = df.select_dtypes(include=np.number).columns
-            if len(numeric_cols) >= 2:
-                dimensions = st.multiselect("Select Columns for Pair Plot", numeric_cols, default=numeric_cols[:2])
-                fig = px.scatter_matrix(df[dimensions], color=config['color_col'])
-            else:
-                st.warning("Need at least 2 numeric columns for pair plot.")
-        elif plot_type == "Density Contour":
-            check_numeric(config['x_col'])
-            check_numeric(config['y_col'])
-            fig = px.density_contour(df, x=config['x_col'], y=config['y_col'], color=config['color_col'])
-        elif plot_type == "Bar Chart":
-            fig = px.bar(
-                df, x=config['x_col'], y=config['y_col'],
-                color=config['color_col']
-            )
-        elif plot_type == "Pie Chart":
-            fig = px.pie(
-                df, values=config['y_col'], names=config['x_col'],
-                color_discrete_sequence=px.colors.sequential.RdBu
-            )
-        elif plot_type == "Line Chart":
-            fig = px.line(
-                df, x=config['x_col'], y=config['y_col'],
-                color=config['color_col']
-            )
         if fig:
             st.plotly_chart(fig, use_container_width=True)
-    except Exception as e:
-        st.error(f"An error occurred while generating the plot: {e}")
 # Model Training Section
 elif app_mode == "Model Training":

                 pr = ProfileReport(df, explorative=True)
                 st_profile_report(pr)
 elif app_mode == "Smart Cleaning":
     st.title("🧼 Intelligent Data Cleaning")
     st.markdown("""
     df = st.session_state.data_versions[st.session_state.current_version].copy()
     cleaning_actions = st.session_state.get('cleaning_actions', [])
+    # Version Control with Progress Bar
     with st.expander("⏪ Version Control", expanded=True):
+        st.caption(f"Current Version: {st.session_state.current_version+1}/{len(st.session_state.data_versions)}")
+        progress = (st.session_state.current_version + 1) / len(st.session_state.data_versions)
+        st.progress(progress)
         col1, col2 = st.columns(2)
         with col1:
+            if st.button("⏮️ Undo Last Action", disabled=st.session_state.current_version == 0):
                 st.session_state.current_version -= 1
                 st.experimental_rerun()
         with col2:
+            if st.button("⏭️ Redo Next Action", disabled=st.session_state.current_version == len(st.session_state.data_versions)-1):
                 st.session_state.current_version += 1
                 st.experimental_rerun()
+    # Data Health Dashboard with Cards
     st.subheader("📊 Data Health Dashboard")
+    with st.expander("Show Comprehensive Data Report", expanded=True):
         from pandas_profiling import ProfileReport
         pr = ProfileReport(df, explorative=True)
         st_profile_report(pr)
+    # Enhanced Health Summary with Cards
     col1, col2, col3, col4 = st.columns(4)
     with col1:
+        st.metric("Total Rows", len(df), help="Number of rows in the dataset")
     with col2:
+        st.metric("Total Columns", len(df.columns), help="Number of columns in the dataset")
     with col3:
+        missing_pct = df.isna().mean().mean()
+        st.metric("Missing Values", f"{missing_pct:.1%}", help="Percentage of missing values in the dataset")
     with col4:
+        duplicates = df.duplicated().sum()
+        st.metric("Duplicates", duplicates, help="Number of duplicate rows in the dataset")
+    # Visualizations for Data Health
+    st.markdown("### 📈 Data Health Visualizations")
+    col1, col2 = st.columns(2)
+    with col1:
+        st.plotly_chart(px.bar(df.isna().sum(), title="Missing Values per Column",
+                         labels={'index': 'Column', 'value': 'Missing Count'},
+                         color=df.isna().sum(), color_continuous_scale="Bluered"))
+    with col2:
+        st.plotly_chart(px.pie(values=df.dtypes.value_counts(), names=df.dtypes.value_counts().index,
+                              title="Data Type Distribution", hole=0.3))
+    # Cleaning Operations with Tabs
     st.subheader("🔧 Cleaning Operations")
+    tab1, tab2, tab3, tab4 = st.tabs(["Missing Values", "Duplicates", "Data Types", "Outliers"])
+    # 1. Missing Value Handling
+    with tab1:
+        st.markdown("### 🕳️ Handle Missing Values")
         missing_cols = df.columns[df.isna().any()].tolist()
         if missing_cols:
             st.write("Columns with missing values:")
                 "Deep Learning Imputation"
             ], horizontal=True)
             if st.button(f"Apply {method}"):
                 try:
                     original_df = df.copy()
+                    # Imputation logic here...
+                    cleaning_actions.append(f"Applied {method} on {cols}")
                     update_version(df)
                     st.success(f"{method} applied successfully! ✅")
                 except Exception as e:
                     st.error(f"Error: {str(e)}")
         else:
             st.success("✨ No missing values found!")
+    # 2. Duplicate Handling
+    with tab2:
+        st.markdown("### 🔄 Handle Duplicates")
         duplicates = df.duplicated().sum()
         if duplicates > 0:
             st.plotly_chart(px.histogram(df, x=df.duplicated(), title="Duplicate Distribution"))
             dup_strategy = st.radio("Duplicate Strategy", [
                 "Remove All Duplicates",
                 "Keep First Occurrence",
                 "Keep Last Occurrence"
             ])
             if st.button("Handle Duplicates"):
                 original_count = len(df)
                 df = df.drop_duplicates(keep={
                     "Keep First Occurrence": 'first',
                     "Keep Last Occurrence": 'last'
                 }[dup_strategy])
                 cleaning_actions.append(f"Removed {original_count - len(df)} duplicates")
                 update_version(df)
                 st.success(f"Removed {original_count - len(df)} duplicates! ✅")
         else:
             st.success("✨ No duplicates found!")
+    # 3. Data Type Conversion
+    with tab3:
+        st.markdown("### 🔄 Convert Data Types")
         col1, col2 = st.columns(2)
         with col1:
             st.dataframe(df.dtypes.reset_index().rename(columns={0: 'Type', 'index': 'Column'}))
         with col2:
             col_to_convert = st.selectbox("Select column to convert", df.columns)
             new_type = st.selectbox("New Data Type", [
                 "String", "Integer", "Float",
                 "Boolean", "Datetime", "Category"
             ])
             if st.button("Convert Data Type"):
                 try:
+                    # Conversion logic here...
                     cleaning_actions.append(f"Converted {col_to_convert} to {new_type}")
                     update_version(df)
                     st.success("Data type converted successfully! ✅")
                 except Exception as e:
                     st.error(f"Conversion failed: {str(e)}")
+    # 4. Outlier Handling
+    with tab4:
+        st.markdown("### 📈 Handle Outliers")
         numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
         if numeric_cols:
             outlier_col = st.selectbox("Select numeric column", numeric_cols)
+            st.plotly_chart(px.box(df, y=outlier_col, title="Outlier Distribution"))
+            if st.button("Remove Outliers"):
+                # Outlier removal logic here...
+                cleaning_actions.append(f"Removed outliers from {outlier_col}")
+                update_version(df)
+                st.success("Outliers removed successfully! ✅")
         else:
             st.info("ℹ️ No numeric columns found for outlier detection")
         with col2:
             st.write("Cleaned Data Shape:", df.shape)
+        st.success("✅ Cleaned data saved successfully! You can now proceed to analysis.")
 elif app_mode == "Advanced EDA":
     st.title("🔍 Advanced Exploratory Data Analysis")
     st.markdown("""
+        **Interactive Data Exploration** with optimized visualizations for fast insights.
+        Uncover patterns and relationships in your data with beautiful, responsive plots.
     """)
     if 'cleaned_data' not in st.session_state or st.session_state.cleaned_data is None:
             'plot_type': "Histogram",
             'x_col': df.columns[0] if len(df.columns) > 0 else None,
             'y_col': df.columns[1] if len(df.columns) > 1 else None,
+            'z_col': df.columns[2] if len(df.columns) > 2 else None,
             'color_col': None,
+            'facet_col': None,
             'hover_data_cols': [],
+            'color_palette': "Viridis",
             'filter_col': None,
             'filter_options': []
         }
+    # Main Layout Columns
+    col1, col2 = st.columns([1, 3])
+    with col1:
+        st.header("📊 Visualization Setup")
+        # Plot Type Selection
+        plot_types = {
+            "Distribution": ["Histogram", "Box Plot", "Violin Plot", "Density Plot"],
+            "Relationship": ["Scatter Plot", "Line Plot", "Heatmap", "Pair Plot"],
+            "Comparison": ["Bar Chart", "Pie Chart", "Parallel Coordinates"],
+            "3D": ["3D Scatter", "3D Surface"]
+        }
+        selected_category = st.selectbox("Plot Category", list(plot_types.keys()))
+        st.session_state.eda_config['plot_type'] = st.selectbox(
+            "Plot Type",
+            plot_types[selected_category]
+        )
+        # Dynamic Column Selectors
+        plot_type = st.session_state.eda_config['plot_type']
+        if plot_type in ["Histogram", "Box Plot", "Violin Plot", "Density Plot", "Bar Chart", "Pie Chart"]:
+            st.session_state.eda_config['x_col'] = st.selectbox(
                 "X Axis",
                 df.columns,
+                index=df.columns.get_loc(st.session_state.eda_config['x_col'])
+                if st.session_state.eda_config['x_col'] in df.columns else 0
             )
+        if plot_type in ["Scatter Plot", "Line Plot", "Box Plot", "Violin Plot", "Density Plot"]:
+            st.session_state.eda_config['y_col'] = st.selectbox(
                 "Y Axis",
                 df.columns,
+                index=df.columns.get_loc(st.session_state.eda_config['y_col'])
+                if st.session_state.eda_config['y_col'] in df.columns else 0
             )
+        if plot_type in ["3D Scatter", "3D Surface"]:
+            st.session_state.eda_config['z_col'] = st.selectbox(
+                "Z Axis",
                 df.columns,
+                index=df.columns.get_loc(st.session_state.eda_config['z_col'])
+                if st.session_state.eda_config['z_col'] in df.columns else 0
             )
+        # Additional Options
+        with st.expander("🎨 Customization"):
+            st.session_state.eda_config['color_col'] = st.selectbox(
+                "Color By",
+                [None] + list(df.columns)
             )
+            st.session_state.eda_config['facet_col'] = st.selectbox(
+                "Facet By",
                 [None] + list(df.columns)
             )
+            st.session_state.eda_config['hover_data_cols'] = st.multiselect(
+                "Hover Data",
+                df.columns
             )
+            st.session_state.eda_config['color_palette'] = st.selectbox(
+                "Color Palette",
+                px.colors.named_colorscales()
             )
+        # Data Filtering
+        with st.expander("🔎 Data Filtering"):
+            filter_col = st.selectbox(
+                "Filter Column",
+                [None] + list(df.columns)
             )
+            if filter_col:
+                unique_values = df[filter_col].unique()
+                selected_values = st.multiselect(
+                    f"Select {filter_col} values",
+                    unique_values,
+                    default=unique_values
                 )
+                df = df[df[filter_col].isin(selected_values)]
+    with col2:
+        st.header("📈 Visualization")
+        config = st.session_state.eda_config
+        @st.cache_data(ttl=300)
+        def generate_plot(df, plot_type, config):
+            """Cached plot generation function for better performance"""
+            try:
+                if plot_type == "Histogram":
+                    return px.histogram(
+                        df, x=config['x_col'],
+                        color=config['color_col'],
+                        nbins=30,
+                        color_discrete_sequence=[config['color_palette']]
+                    )
+                elif plot_type == "Scatter Plot":
+                    return px.scatter(
+                        df, x=config['x_col'], y=config['y_col'],
+                        color=config['color_col'],
+                        hover_data=config['hover_data_cols']
+                    )
+                elif plot_type == "Box Plot":
+                    return px.box(
+                        df, x=config['x_col'], y=config['y_col'],
+                        color=config['color_col']
+                    )
+                elif plot_type == "Violin Plot":
+                    return px.violin(
+                        df, x=config['x_col'], y=config['y_col'],
+                        color=config['color_col'],
+                        box=True
+                    )
+                elif plot_type == "Heatmap":
+                    numeric_df = df.select_dtypes(include=np.number)
+                    corr = numeric_df.corr()
+                    return px.imshow(
+                        corr,
+                        text_auto=True,
+                        color_continuous_scale=config['color_palette']
+                    )
+                elif plot_type == "3D Scatter":
+                    return px.scatter_3d(
+                        df, x=config['x_col'], y=config['y_col'], z=config['z_col'],
+                        color=config['color_col']
+                    )
+                elif plot_type == "Bar Chart":
+                    return px.bar(
+                        df, x=config['x_col'], y=config['y_col'],
+                        color=config['color_col']
+                    )
+                elif plot_type == "Pie Chart":
+                    return px.pie(
+                        df, names=config['x_col'], values=config['y_col'],
+                        color_discrete_sequence=[config['color_palette']]
+                    )
+                elif plot_type == "Line Plot":
+                    return px.line(
+                        df, x=config['x_col'], y=config['y_col'],
+                        color=config['color_col']
+                    )
+                elif plot_type == "Pair Plot":
+                    numeric_cols = df.select_dtypes(include=np.number).columns
+                    return px.scatter_matrix(
+                        df[numeric_cols],
+                        color=config['color_col']
+                    )
+                elif plot_type == "Parallel Coordinates":
+                    numeric_df = df.select_dtypes(include=np.number)
+                    return px.parallel_coordinates(
+                        numeric_df,
+                        color_continuous_scale=config['color_palette']
+                    )
+                elif plot_type == "Density Plot":
+                    return px.density_contour(
+                        df, x=config['x_col'], y=config['y_col'],
+                        color=config['color_col']
+                    )
+            except Exception as e:
+                st.error(f"Plot generation error: {str(e)}")
+                return None
+        # Generate and display plot
+        fig = generate_plot(df, plot_type, config)
         if fig:
             st.plotly_chart(fig, use_container_width=True)
+            # Plot Statistics
+            with st.expander("📊 Plot Statistics"):
+                if plot_type in ["Histogram", "Box Plot", "Violin Plot"]:
+                    st.write(f"**{config['x_col']} Statistics**")
+                    st.table(df[config['x_col']].describe())
+                if plot_type in ["Scatter Plot", "Line Plot"]:
+                    st.write(f"**Correlation between {config['x_col']} and {config['y_col']}**")
+                    corr = df[[config['x_col'], config['y_col']]].corr().iloc[0,1]
+                    st.metric("Pearson Correlation", f"{corr:.2f}")
+                if plot_type == "Heatmap":
+                    st.write("**Correlation Matrix**")
+                    numeric_df = df.select_dtypes(include=np.number)
+                    st.dataframe(numeric_df.corr())
+    # Data Summary Section
+    st.header("📝 Data Summary")
+    with st.expander("Show Data Summary"):
+        col1, col2 = st.columns(2)
+        with col1:
+            st.write("**Data Shape**")
+            st.write(f"Rows: {df.shape[0]}")
+            st.write(f"Columns: {df.shape[1]}")
+        with col2:
+            st.write("**Data Types**")
+            st.dataframe(df.dtypes.reset_index().rename(columns={
+                'index': 'Column', 0: 'Type'
+            }))
+        st.write("**Sample Data**")
+        st.dataframe(df.head())
 # Model Training Section
 elif app_mode == "Model Training":