Spaces:

CosmickVisions
/

Data-Vision

Running

App Files Files Community

CosmickVisions commited on Mar 2

Commit

98ce78a

verified ·

1 Parent(s): f1cf7f1

Update app.py

Browse files

Files changed (1) hide show

app.py +237 -251

app.py CHANGED Viewed

@@ -27,103 +27,6 @@ st.set_page_config(
 )
-# HTML and CSS for the draggable button
-html_code = """
-<style>
-#floatingButton {
-  position: fixed;
-  bottom: 20px;
-  right: 20px;
-  width: 60px;
-  height: 60px;
-  background-color: #007bff;
-  color: white;
-  border: none;
-  border-radius: 50%;
-  cursor: pointer;
-  font-size: 24px;
-  z-index: 1000;
-}
-#floatingButton:active {
-  background-color: #0056b3;
-}
-.draggable {
-  position: absolute;
-  cursor: move;
-}
-</style>
-<button id="floatingButton" class="draggable">+</button>
-<script>
-dragElement(document.getElementById("floatingButton"));
-function dragElement(elmnt) {
-  var pos1 = 0, pos2 = 0, pos3 = 0, pos4 = 0;
-  elmnt.onmousedown = dragMouseDown;
-  function dragMouseDown(e) {
-    e = e || window.event;
-    e.preventDefault();
-    pos3 = e.clientX;
-    pos4 = e.clientY;
-    document.onmouseup = closeDragElement;
-    document.onmousemove = elementDrag;
-  }
-  function elementDrag(e) {
-    e = e || window.event;
-    e.preventDefault();
-    pos1 = pos3 - e.clientX;
-    pos2 = e.clientY;
-    pos3 = e.clientX;
-    pos4 = e.clientY;
-    elmnt.style.top = (elmnt.offsetTop - pos2) + "px";
-    elmnt.style.left = (elmnt.offsetLeft - pos1) + "px";
-  }
-  function closeDragElement() {
-    document.onmouseup = null;
-    document.onmousemove = null;
-  }
-}
-document.getElementById("floatingButton").onclick = function() {
-  var expander = document.getElementById("dataExpander");
-  if (expander.style.display === "none") {
-    expander.style.display = "block";
-  } else {
-    expander.style.display = "none";
-  }
-  fetch("/?show_data=true", {method: "POST"});
-}
-</script>
-"""
-# JavaScript to handle the toggle functionality
-js_code = """
-<script>
-document.addEventListener('DOMContentLoaded', function() {
-  var expander = document.createElement('div');
-  expander.id = "dataExpander";
-  expander.style.display = "none";
-  document.body.appendChild(expander);
-});
-</script>
-"""
-st.markdown(html_code, unsafe_allow_html=True)
-st.markdown(js_code, unsafe_allow_html=True)
-# Function to show data in an expander
-def show_data():
-    st.session_state.show_data = not st.session_state.show_data  # Toggle the state
-    if st.session_state.show_data:
-        with st.expander("✨ Data Viewport", expanded=True):
-            st.dataframe(df, use_container_width=True)
-# --------------------------
 # --------------------------
 # Custom Styling
 # --------------------------
@@ -152,6 +55,16 @@ if 'model' not in st.session_state:
 # --------------------------
 # Helper Functions
 # --------------------------
 def generate_quality_report(df):
     """Generate comprehensive data quality report"""
     report = {
@@ -484,23 +397,29 @@ if app_mode == "Data Upload":
         except Exception as e:
             st.error(f"Error loading file: {str(e)}")
-elif app_mode == "Data Cleaning":
     st.title("🧹 Smart Data Cleaning")
     if st.session_state.raw_data is None:
         st.warning("Please upload data first")
         st.stop()
-    # Initialize session state for undo functionality
     if 'data_versions' not in st.session_state:
         st.session_state.data_versions = [st.session_state.raw_data.copy()]
-    df = st.session_state.data_versions[-1].copy()
     # --------------------------
     # Data Health Dashboard
     # --------------------------
     with st.expander("📊 Data Health Dashboard", expanded=True):
         col1, col2, col3 = st.columns(3)
         with col1:
@@ -509,200 +428,227 @@ elif app_mode == "Data Cleaning":
             st.metric("Total Rows", len(df))
         with col3:
             st.metric("Missing Values", df.isna().sum().sum())
         # Generate quick profile report
         if st.button("Generate Data Health Report"):
             with st.spinner("Analyzing data..."):
                 profile = ProfileReport(df, minimal=True)
                 st_profile_report(profile)
     # --------------------------
     # Undo Functionality
     # --------------------------
     if len(st.session_state.data_versions) > 1:
         if st.button("⏮️ Undo Last Action"):
-            st.session_state.data_versions.pop()
-            df = st.session_state.data_versions[-1].copy()
-            st.session_state.cleaned_data = df
             st.success("Last action undone!")
     # --------------------------
     # Missing Value Handling
     # --------------------------
     with st.expander("🔍 Missing Values Treatment", expanded=True):
         missing_cols = df.columns[df.isna().any()].tolist()
         if missing_cols:
             cols = st.multiselect("Select columns to handle", missing_cols)
             method = st.selectbox("Imputation Method", [
-                "Drop Missing",
-                "Mean/Median",
                 "Custom Value",
                 "Forward Fill",
                 "Backward Fill"
             ])
             if method == "Custom Value":
                 custom_val = st.text_input("Enter custom value")
-            if st.button("Apply Treatment"):
-                st.session_state.data_versions.append(df.copy())
                 try:
                     if method == "Drop Missing":
-                        df = df.dropna(subset=cols)
                     elif method == "Mean/Median":
                         for col in cols:
-                            if pd.api.types.is_numeric_dtype(df[col]):
-                                df[col] = df[col].fillna(df[col].median())
                             else:
-                                df[col] = df[col].fillna(df[col].mode()[0])
                     elif method == "Custom Value" and custom_val:
                         for col in cols:
-                            df[col] = df[col].fillna(custom_val)
                     elif method == "Forward Fill":
-                        df[cols] = df[cols].ffill()
                     elif method == "Backward Fill":
-                        df[cols] = df[cols].bfill()
-                    st.session_state.cleaned_data = df
-                    st.success("Missing values handled successfully!")
                 except Exception as e:
                     st.error(f"Error: {str(e)}")
         else:
             st.success("✨ No missing values found!")
     # --------------------------
     # Data Type Conversion
     # --------------------------
     with st.expander("🔄 Data Type Conversion"):
         col_to_convert = st.selectbox("Select column", df.columns)
         new_type = st.selectbox("New data type", [
-            "String", "Integer", "Float",
             "Boolean", "Datetime"
         ])
         if new_type == "Datetime":
             date_format = st.text_input("Date format (e.g. %Y-%m-%d)", "%Y-%m-%d")
-        if st.button("Convert"):
-            st.session_state.data_versions.append(df.copy())
             try:
                 if new_type == "String":
-                    df[col_to_convert] = df[col_to_convert].astype(str)
                 elif new_type == "Integer":
-                    if df[col_to_convert].dtype == 'object':
                         st.error("Cannot convert text column to integer!")
                     else:
-                        df[col_to_convert] = pd.to_numeric(df[col_to_convert], errors='coerce').astype('Int64')
                 elif new_type == "Float":
-                    if df[col_to_convert].dtype == 'object':
                         st.error("Cannot convert text column to float!")
                     else:
-                        df[col_to_convert] = pd.to_numeric(df[col_to_convert], errors='coerce')
                 elif new_type == "Boolean":
-                    df[col_to_convert] = df[col_to_convert].astype(bool)
                 elif new_type == "Datetime":
-                    df[col_to_convert] = pd.to_datetime(df[col_to_convert], format=date_format, errors='coerce')
-                st.session_state.cleaned_data = df
-                st.success("Conversion successful!")
             except Exception as e:
                 st.error(f"Error: {str(e)}")
     # --------------------------
     # Drop Columns
     # --------------------------
     with st.expander("🗑️ Drop Columns"):
         columns_to_drop = st.multiselect("Select columns to drop", df.columns)
         if columns_to_drop:
             st.warning(f"Will drop: {', '.join(columns_to_drop)}")
-            if st.button("Confirm Drop"):
-                st.session_state.data_versions.append(df.copy())
-                df = df.drop(columns=columns_to_drop)
-                st.session_state.cleaned_data = df
-                st.success("Selected columns dropped successfully!")
     # --------------------------
     # Label Encoding
     # --------------------------
     with st.expander("🔢 Label Encoding"):
         data_to_encode = st.multiselect("Select categorical columns to encode", df.select_dtypes(include='object').columns)
         if data_to_encode:
-            if st.button("Apply Label Encoding"):
-                st.session_state.data_versions.append(df.copy())
                 label_encoders = {}
                 for col in data_to_encode:
                     le = LabelEncoder()
-                    df[col] = le.fit_transform(df[col].astype(str))
                     label_encoders[col] = le
-                st.session_state.cleaned_data = df
-                st.success("Label encoding applied successfully!")
     # --------------------------
     # StandardScaler
     # --------------------------
     with st.expander("📏 StandardScaler"):
         scale_cols = st.multiselect("Select numeric columns to scale", df.select_dtypes(include=np.number).columns)
         if scale_cols:
-            if st.button("Apply StandardScaler"):
-                st.session_state.data_versions.append(df.copy())
                 try:
                     scaler = StandardScaler()
-                    df[scale_cols] = scaler.fit_transform(df[scale_cols])
-                    st.session_state.cleaned_data = df
-                    st.success("Standard scaling applied successfully!")
                 except Exception as e:
                     st.error(f"Error: {str(e)}")
     # --------------------------
     # Pattern-Based Cleaning
     # --------------------------
     with st.expander("🕵️ Pattern-Based Cleaning"):
         selected_col = st.selectbox("Select text column", df.select_dtypes(include='object').columns)
         pattern = st.text_input("Regex pattern (e.g. \d+ for numbers)")
         replacement = st.text_input("Replacement value")
-        if st.button("Apply Pattern Replacement"):
-            st.session_state.data_versions.append(df.copy())
             try:
-                df[selected_col] = df[selected_col].str.replace(pattern, replacement, regex=True)
-                st.session_state.cleaned_data = df
-                st.success("Pattern replacement applied successfully!")
             except Exception as e:
                 st.error(f"Error: {str(e)}")
     # --------------------------
     # Bulk Operations
     # --------------------------
     with st.expander("🚀 Bulk Actions"):
-        if st.button("Auto-Clean Common Issues"):
-            st.session_state.data_versions.append(df.copy())
-            df = df.dropna(axis=1, how='all')  # Remove empty cols
-            df = df.convert_dtypes()  # Better type inference
-            text_cols = df.select_dtypes(include='object').columns
-            df[text_cols] = df[text_cols].apply(lambda x: x.str.strip())
-            st.session_state.cleaned_data = df
-            st.success("Bulk cleaning completed!")
     # --------------------------
     # Cleaned Data Preview
     # --------------------------
-    if st.session_state.cleaned_data is not None:
         with st.expander("✨ Cleaned Data Preview", expanded=True):
-            st.dataframe(st.session_state.cleaned_data.head(), use_container_width=True)
-elif app_mode == "EDA":
     st.title("🔍 Interactive Data Explorer")
     if st.session_state.cleaned_data is None:
         st.warning("Please clean your data first")
         st.stop()
     df = st.session_state.cleaned_data
     # --------------------------
     # Enhanced Data Overview
     # --------------------------
     with st.expander("📁 Dataset Overview", expanded=True):
         col1, col2, col3, col4 = st.columns(4)
         with col1:
@@ -715,7 +661,7 @@ elif app_mode == "EDA":
         with col4:
             dupes = df.duplicated().sum()
             st.metric("Duplicates", dupes, help="Fully duplicated rows")
         # Data Preview Tabs
         tab1, tab2, tab3 = st.tabs(["Quick Preview", "Column Types", "Missing Matrix"])
         with tab1:
@@ -727,16 +673,17 @@ elif app_mode == "EDA":
         with tab3:
             fig = px.imshow(df.isna(), color_continuous_scale='gray')
             st.plotly_chart(fig, use_container_width=True)
     # --------------------------
     # Smart Visualization Builder
     # --------------------------
     st.subheader("📊 Visualization Builder")
     # Automatic plot type suggestions
     numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
     categorical_cols = df.select_dtypes(exclude=np.number).columns.tolist()
     col1, col2 = st.columns([1, 3])
     with col1:
         # Dynamic plot type filtering
@@ -752,81 +699,110 @@ elif app_mode == "EDA":
             index=0,
             help="Automatically filtered based on data types"
         )
-        # Dynamic axis selection
-        x_axis = st.selectbox("X-axis", df.columns,
-                            help="Primary dimension for analysis")
-        y_axis = st.selectbox("Y-axis", [None] + df.columns.tolist(),
-                            disabled=plot_type in ["Histogram", "Bar Chart"],
-                            help="Secondary dimension for analysis")
-        # Smart color encoding
-        color_options = ["None"] + df.columns.tolist()
-        color_by = st.selectbox("Color encoding", color_options,
-                              format_func=lambda x: "No color" if x == "None" else x)
-        # Context-aware controls
-        if plot_type in ["3D Scatter", "Parallel Categories"]:
-            z_axis = st.selectbox("Z-axis", [None] + df.columns.tolist())
         if plot_type == "Parallel Categories":
             dimensions = st.multiselect("Dimensions", df.columns.tolist(), default=df.columns[:3])
     with col2:
         try:
-            # Generate appropriate visualization
             if plot_type == "Scatter Plot":
-                fig = px.scatter(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None,
-                               hover_data=df.columns, trendline="lowess")
             elif plot_type == "Histogram":
-                fig = px.histogram(df, x=x_axis, color=color_by if color_by != "None" else None,
-                                 nbins=30, marginal="box")
             elif plot_type == "Box Plot":
-                fig = px.box(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None)
             elif plot_type == "Violin Plot":
-                fig = px.violin(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None,
-                              box=True)
             elif plot_type == "Line Chart":
-                fig = px.line(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None)
             elif plot_type == "Bar Chart":
-                fig = px.bar(df, x=x_axis, color=color_by if color_by != "None" else None)
             elif plot_type == "Correlation Matrix":
-                corr = df.select_dtypes(include=np.number).corr()
-                fig = px.imshow(corr, text_auto=True, color_continuous_scale='RdBu_r',
-                              zmin=-1, zmax=1)
             elif plot_type == "Pair Plot":
-                fig = px.scatter_matrix(df, dimensions=numeric_cols[:4],
-                                      color=color_by if color_by != "None" else None)
             elif plot_type == "Heatmap":
-                fig = px.density_heatmap(df, x=x_axis, y=y_axis, facet_col=color_by if color_by != "None" else None)
             elif plot_type == "3D Scatter":
-                fig = px.scatter_3d(df, x=x_axis, y=y_axis, z=z_axis,
-                                  color=color_by if color_by != "None" else None)
             elif plot_type == "Parallel Categories":
-                fig = px.parallel_categories(df, dimensions=dimensions,
-                                           color=color_by if color_by != "None" else None)
             # Interactive plot customization
-            with st.expander("⚙️ Chart Settings", expanded=False):
-                col1, col2 = st.columns(2)
-                with col1:
-                    chart_title = st.text_input("Chart title", f"{plot_type} of {x_axis} vs {y_axis}")
-                    fig.update_layout(title=chart_title)
-                with col2:
-                    theme = st.selectbox("Color theme", px.colors.named_colorscales())
-                    fig.update_layout(colorway=px.colors.qualitative.Plotly)
-            st.plotly_chart(fig, use_container_width=True)
         except Exception as e:
             st.error(f"Couldn't create visualization: {str(e)}")
             st.info("Try selecting different columns or changing the visualization type")
     # --------------------------
     # Advanced Analysis
     # --------------------------
     with st.expander("🔬 Deep Analysis Tools", expanded=False):
         tab1, tab2, tab3 = st.tabs(["Statistical Tests", "Pattern Explorer", "Data Transformation"])
         with tab1:
             st.subheader("Hypothesis Testing")
             col1, col2 = st.columns(2)
@@ -834,12 +810,15 @@ elif app_mode == "EDA":
                 test_var = st.selectbox("Test variable", numeric_cols)
             with col2:
                 group_var = st.selectbox("Grouping variable", [None] + categorical_cols)
             if group_var and st.button("Run ANOVA"):
-                groups = df.groupby(group_var)[test_var].apply(list)
-                f_val, p_val = stats.f_oneway(*groups)
-                st.write(f"F-value: {f_val:.2f}, p-value: {p_val:.4f}")
         with tab2:
             st.subheader("Pattern Discovery")
             explore_col = st.selectbox("Column to analyze", df.columns)
@@ -848,7 +827,7 @@ elif app_mode == "EDA":
                 if pattern:
                     matches = df[explore_col].str.contains(pattern).sum()
                     st.write(f"Found {matches} matches")
         with tab3:
             st.subheader("Data Transformation")
             transform_col = st.selectbox("Column to transform", numeric_cols)
@@ -859,21 +838,28 @@ elif app_mode == "EDA":
                 df[transform_col] = np.sqrt(df[transform_col])
             elif transform_type == "Z-score":
                 df[transform_col] = (df[transform_col] - df[transform_col].mean())/df[transform_col].std()
     # --------------------------
     # Export & Save
     # --------------------------
     st.subheader("💾 Export Options")
     col1, col2 = st.columns(2)
     with col1:
         if st.button("📥 Download Current Visualization"):
-            fig.write_image("visualization.png")
-            st.success("Image saved!")
     with col2:
         if st.button("📊 Export Analysis Report"):
-            profile = ProfileReport(df, minimal=True)
-            profile.to_file("analysis_report.html")
-            st.success("Report generated!")
 # Streamlit App
 elif app_mode == "Model Training":

 )
 # --------------------------
 # Custom Styling
 # --------------------------
 # --------------------------
 # Helper Functions
 # --------------------------
+def enhance_section_title(title, icon="✨"):
+    """Helper function to create a styled section title with an icon."""
+    st.markdown(f"<h2 style='border-bottom: 2px solid #ccc; padding-bottom: 5px;'>{icon} {title}</h2>", unsafe_allow_html=True)
+def update_cleaned_data(df):
+    """Updates the cleaned data in session state."""
+    st.session_state.cleaned_data = df
+    st.session_state.data_versions.append(df.copy())  # Append to history
+    st.success("Action completed successfully!")
 def generate_quality_report(df):
     """Generate comprehensive data quality report"""
     report = {
         except Exception as e:
             st.error(f"Error loading file: {str(e)}")
+# --------------------------
+# Page Content
+# --------------------------
+if st.session_state.get("app_mode") == "Data Cleaning":
     st.title("🧹 Smart Data Cleaning")
     if st.session_state.raw_data is None:
         st.warning("Please upload data first")
         st.stop()
+    # Initialize session state (only if it's not already there)
     if 'data_versions' not in st.session_state:
         st.session_state.data_versions = [st.session_state.raw_data.copy()]
+    if 'cleaned_data' not in st.session_state: #Added a conditional value
+        st.session_state.cleaned_data = st.session_state.raw_data.copy()
+    df = st.session_state.cleaned_data.copy()
     # --------------------------
     # Data Health Dashboard
     # --------------------------
+    enhance_section_title("Data Health Dashboard", "📊")
     with st.expander("📊 Data Health Dashboard", expanded=True):
         col1, col2, col3 = st.columns(3)
         with col1:
             st.metric("Total Rows", len(df))
         with col3:
             st.metric("Missing Values", df.isna().sum().sum())
         # Generate quick profile report
         if st.button("Generate Data Health Report"):
             with st.spinner("Analyzing data..."):
                 profile = ProfileReport(df, minimal=True)
                 st_profile_report(profile)
     # --------------------------
     # Undo Functionality
     # --------------------------
     if len(st.session_state.data_versions) > 1:
         if st.button("⏮️ Undo Last Action"):
+            st.session_state.data_versions.pop()  # Remove current version
+            st.session_state.cleaned_data = st.session_state.data_versions[-1].copy() # Set data
             st.success("Last action undone!")
+            st.experimental_rerun() #Force re-run after undo
     # --------------------------
     # Missing Value Handling
     # --------------------------
+    enhance_section_title("Missing Values Treatment", "🔍")
     with st.expander("🔍 Missing Values Treatment", expanded=True):
         missing_cols = df.columns[df.isna().any()].tolist()
         if missing_cols:
             cols = st.multiselect("Select columns to handle", missing_cols)
             method = st.selectbox("Imputation Method", [
+                "Drop Missing",
+                "Mean/Median",
                 "Custom Value",
                 "Forward Fill",
                 "Backward Fill"
             ])
             if method == "Custom Value":
                 custom_val = st.text_input("Enter custom value")
+            if st.button("Apply Treatment (Missing)"):
                 try:
+                    new_df = df.copy()  # Create a copy to modify
                     if method == "Drop Missing":
+                        new_df = new_df.dropna(subset=cols)
                     elif method == "Mean/Median":
                         for col in cols:
+                            if pd.api.types.is_numeric_dtype(new_df[col]):
+                                new_df[col] = new_df[col].fillna(new_df[col].median())
                             else:
+                                new_df[col] = new_df[col].fillna(new_df[col].mode()[0])
                     elif method == "Custom Value" and custom_val:
                         for col in cols:
+                            new_df[col] = new_df[col].fillna(custom_val)
                     elif method == "Forward Fill":
+                        new_df[cols] = new_df[cols].ffill()
                     elif method == "Backward Fill":
+                        new_df[cols] = new_df[cols].bfill()
+                    update_cleaned_data(new_df)
+                    st.experimental_rerun() #Force re-run after apply
                 except Exception as e:
                     st.error(f"Error: {str(e)}")
         else:
             st.success("✨ No missing values found!")
     # --------------------------
     # Data Type Conversion
     # --------------------------
+    enhance_section_title("Data Type Conversion", "🔄")
     with st.expander("🔄 Data Type Conversion"):
         col_to_convert = st.selectbox("Select column", df.columns)
         new_type = st.selectbox("New data type", [
+            "String", "Integer", "Float",
             "Boolean", "Datetime"
         ])
         if new_type == "Datetime":
             date_format = st.text_input("Date format (e.g. %Y-%m-%d)", "%Y-%m-%d")
+        if st.button("Convert (Data Type)"):
             try:
+                new_df = df.copy()
                 if new_type == "String":
+                    new_df[col_to_convert] = new_df[col_to_convert].astype(str)
                 elif new_type == "Integer":
+                    if new_df[col_to_convert].dtype == 'object':
                         st.error("Cannot convert text column to integer!")
                     else:
+                        new_df[col_to_convert] = pd.to_numeric(new_df[col_to_convert], errors='coerce').astype('Int64')
                 elif new_type == "Float":
+                    if new_df[col_to_convert].dtype == 'object':
                         st.error("Cannot convert text column to float!")
                     else:
+                        new_df[col_to_convert] = pd.to_numeric(new_df[col_to_convert], errors='coerce')
                 elif new_type == "Boolean":
+                    new_df[col_to_convert] = new_df[col_to_convert].astype(bool)
                 elif new_type == "Datetime":
+                    new_df[col_to_convert] = pd.to_datetime(new_df[col_to_convert], format=date_format, errors='coerce')
+                update_cleaned_data(new_df)
+                st.experimental_rerun() #Force re-run after apply
             except Exception as e:
                 st.error(f"Error: {str(e)}")
     # --------------------------
     # Drop Columns
     # --------------------------
+    enhance_section_title("Drop Columns", "🗑️")
     with st.expander("🗑️ Drop Columns"):
         columns_to_drop = st.multiselect("Select columns to drop", df.columns)
         if columns_to_drop:
             st.warning(f"Will drop: {', '.join(columns_to_drop)}")
+            if st.button("Confirm Drop (Columns)"):
+                new_df = df.copy()
+                new_df = new_df.drop(columns=columns_to_drop)
+                update_cleaned_data(new_df)
+                st.experimental_rerun() #Force re-run after apply
     # --------------------------
     # Label Encoding
     # --------------------------
+    enhance_section_title("Label Encoding", "🔢")
     with st.expander("🔢 Label Encoding"):
         data_to_encode = st.multiselect("Select categorical columns to encode", df.select_dtypes(include='object').columns)
         if data_to_encode:
+            if st.button("Apply Label Encoding (Encoding)"):
+                new_df = df.copy()
                 label_encoders = {}
                 for col in data_to_encode:
                     le = LabelEncoder()
+                    new_df[col] = le.fit_transform(new_df[col].astype(str))
                     label_encoders[col] = le
+                update_cleaned_data(new_df)
+                st.experimental_rerun() #Force re-run after apply
     # --------------------------
     # StandardScaler
     # --------------------------
+    enhance_section_title("StandardScaler", "📏")
     with st.expander("📏 StandardScaler"):
         scale_cols = st.multiselect("Select numeric columns to scale", df.select_dtypes(include=np.number).columns)
         if scale_cols:
+            if st.button("Apply StandardScaler (Scaling)"):
                 try:
+                    new_df = df.copy()
                     scaler = StandardScaler()
+                    new_df[scale_cols] = scaler.fit_transform(new_df[scale_cols])
+                    update_cleaned_data(new_df)
+                    st.experimental_rerun() #Force re-run after apply
                 except Exception as e:
                     st.error(f"Error: {str(e)}")
     # --------------------------
     # Pattern-Based Cleaning
     # --------------------------
+    enhance_section_title("Pattern-Based Cleaning", "🕵️")
     with st.expander("🕵️ Pattern-Based Cleaning"):
         selected_col = st.selectbox("Select text column", df.select_dtypes(include='object').columns)
         pattern = st.text_input("Regex pattern (e.g. \d+ for numbers)")
         replacement = st.text_input("Replacement value")
+        if st.button("Apply Pattern Replacement (Replace)"):
             try:
+                new_df = df.copy()
+                new_df[selected_col] = new_df[selected_col].str.replace(pattern, replacement, regex=True)
+                update_cleaned_data(new_df)
+                st.experimental_rerun() #Force re-run after apply
             except Exception as e:
                 st.error(f"Error: {str(e)}")
     # --------------------------
     # Bulk Operations
     # --------------------------
+    enhance_section_title("Bulk Actions", "🚀")
     with st.expander("🚀 Bulk Actions"):
+        if st.button("Auto-Clean Common Issues (Cleaning)"):
+            new_df = df.copy()
+            new_df = new_df.dropna(axis=1, how='all')  # Remove empty cols
+            new_df = new_df.convert_dtypes()  # Better type inference
+            text_cols = new_df.select_dtypes(include='object').columns
+            new_df[text_cols] = new_df[text_cols].apply(lambda x: x.str.strip())
+            update_cleaned_data(new_df)
+            st.experimental_rerun() #Force re-run after apply
     # --------------------------
     # Cleaned Data Preview
     # --------------------------
+    if st.session_state.get("cleaned_data") is not None:
+        enhance_section_title("Cleaned Data Preview", "✨")
         with st.expander("✨ Cleaned Data Preview", expanded=True):
+            st.dataframe(st.session_state.cleaned_data.head(), use_container_width=True)
+import streamlit as st
+import pandas as pd
+import numpy as np
+import plotly.express as px
+from scipy import stats  # For statistical tests
+from pandas_profiling import ProfileReport  # Automated EDA (if you have it installed)
+# --------------------------
+# Helper Functions
+# --------------------------
+def enhance_section_title(title, icon="✨"):
+    """Helper function to create a styled section title with an icon."""
+    st.markdown(f"<h2 style='border-bottom: 2px solid #ccc; padding-bottom: 5px;'>{icon} {title}</h2>", unsafe_allow_html=True)
+# --------------------------
+# Page Content
+# --------------------------
+if st.session_state.get("app_mode") == "EDA":
     st.title("🔍 Interactive Data Explorer")
     if st.session_state.cleaned_data is None:
         st.warning("Please clean your data first")
         st.stop()
     df = st.session_state.cleaned_data
     # --------------------------
     # Enhanced Data Overview
     # --------------------------
+    enhance_section_title("Dataset Overview", "📁")
     with st.expander("📁 Dataset Overview", expanded=True):
         col1, col2, col3, col4 = st.columns(4)
         with col1:
         with col4:
             dupes = df.duplicated().sum()
             st.metric("Duplicates", dupes, help="Fully duplicated rows")
         # Data Preview Tabs
         tab1, tab2, tab3 = st.tabs(["Quick Preview", "Column Types", "Missing Matrix"])
         with tab1:
         with tab3:
             fig = px.imshow(df.isna(), color_continuous_scale='gray')
             st.plotly_chart(fig, use_container_width=True)
     # --------------------------
     # Smart Visualization Builder
     # --------------------------
+    enhance_section_title("Visualization Builder", "📊")
     st.subheader("📊 Visualization Builder")
     # Automatic plot type suggestions
     numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
     categorical_cols = df.select_dtypes(exclude=np.number).columns.tolist()
     col1, col2 = st.columns([1, 3])
     with col1:
         # Dynamic plot type filtering
             index=0,
             help="Automatically filtered based on data types"
         )
+        # Axis selection - conditionally displayed
+        x_axis = None
+        y_axis = None
+        z_axis = None
+        color_by = "None" # Default color to None
+        if plot_type not in ["Correlation Matrix", "Pair Plot"]:
+            x_axis = st.selectbox("X-axis", df.columns, help="Primary dimension for analysis")
+        if plot_type in ["Scatter Plot", "Box Plot", "Violin Plot", "Line Chart", "Heatmap"]:
+            y_axis = st.selectbox("Y-axis", df.columns, help="Secondary dimension for analysis")
+        if plot_type == "3D Scatter":
+            z_axis = st.selectbox("Z-axis", df.columns, help="Third dimension for analysis")
+        # Color encoding
+        if plot_type not in ["Correlation Matrix", "Pair Plot"]:
+            color_options = ["None"] + df.columns.tolist()
+            color_by = st.selectbox("Color encoding", color_options,
+                                  format_func=lambda x: "No color" if x == "None" else x)
+        # Context-aware controls for Parallel Categories
+        dimensions = None
         if plot_type == "Parallel Categories":
             dimensions = st.multiselect("Dimensions", df.columns.tolist(), default=df.columns[:3])
     with col2:
         try:
+            fig = None  # Initialize fig to None
+            # Generate appropriate visualization with input validation
             if plot_type == "Scatter Plot":
+                if x_axis and y_axis:
+                    fig = px.scatter(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None,
+                                   hover_data=df.columns, trendline="lowess")
             elif plot_type == "Histogram":
+                if x_axis:
+                    fig = px.histogram(df, x=x_axis, color=color_by if color_by != "None" else None,
+                                     nbins=30, marginal="box")
             elif plot_type == "Box Plot":
+                if x_axis and y_axis:
+                    fig = px.box(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None)
             elif plot_type == "Violin Plot":
+                if x_axis and y_axis:
+                    fig = px.violin(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None,
+                                  box=True)
             elif plot_type == "Line Chart":
+                if x_axis and y_axis:
+                    fig = px.line(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None)
             elif plot_type == "Bar Chart":
+                if x_axis:
+                    fig = px.bar(df, x=x_axis, color=color_by if color_by != "None" else None)
             elif plot_type == "Correlation Matrix":
+                numeric_df = df.select_dtypes(include=np.number)
+                if len(numeric_df.columns) > 1:
+                    corr = numeric_df.corr()
+                    fig = px.imshow(corr, text_auto=True, color_continuous_scale='RdBu_r',
+                                  zmin=-1, zmax=1)
             elif plot_type == "Pair Plot":
+                numeric_df = df.select_dtypes(include=np.number)
+                num_cols = len(numeric_df.columns)
+                if num_cols > 1:
+                    dimensions = numeric_df.columns[:min(4, num_cols)].tolist()  # Limit to the first 4 for performance
+                    fig = px.scatter_matrix(df, dimensions=dimensions,
+                                          color=color_by if color_by != "None" else None)
             elif plot_type == "Heatmap":
+                if x_axis and y_axis:
+                    fig = px.density_heatmap(df, x=x_axis, y=y_axis, facet_col=color_by if color_by != "None" else None)
             elif plot_type == "3D Scatter":
+                if x_axis and y_axis and z_axis:
+                    fig = px.scatter_3d(df, x=x_axis, y=y_axis, z=z_axis,
+                                      color=color_by if color_by != "None" else None)
             elif plot_type == "Parallel Categories":
+                if dimensions:
+                    fig = px.parallel_categories(df, dimensions=dimensions,
+                                               color=color_by if color_by != "None" else None)
+            else:
+                st.error("Please choose the specific plot")
             # Interactive plot customization
+            if fig: #Only display customization options when we have a plot
+                with st.expander("⚙️ Chart Settings", expanded=False):
+                    col1, col2 = st.columns(2)
+                    with col1:
+                        chart_title = st.text_input("Chart title", f"{plot_type} of {x_axis} vs {y_axis}" if (x_axis and y_axis) else f"{plot_type} of {x_axis}" if x_axis else plot_type)
+                        fig.update_layout(title=chart_title)
+                    with col2:
+                        theme = st.selectbox("Color theme", px.colors.named_colorscales())
+                        fig.update_layout(colorway=px.colors.qualitative.Plotly)
+                st.plotly_chart(fig, use_container_width=True)
         except Exception as e:
             st.error(f"Couldn't create visualization: {str(e)}")
             st.info("Try selecting different columns or changing the visualization type")
     # --------------------------
     # Advanced Analysis
     # --------------------------
+    enhance_section_title("Deep Analysis Tools", "🔬")
     with st.expander("🔬 Deep Analysis Tools", expanded=False):
         tab1, tab2, tab3 = st.tabs(["Statistical Tests", "Pattern Explorer", "Data Transformation"])
         with tab1:
             st.subheader("Hypothesis Testing")
             col1, col2 = st.columns(2)
                 test_var = st.selectbox("Test variable", numeric_cols)
             with col2:
                 group_var = st.selectbox("Grouping variable", [None] + categorical_cols)
             if group_var and st.button("Run ANOVA"):
+                if test_var and group_var:
+                    groups = df.groupby(group_var)[test_var].apply(list)
+                    f_val, p_val = stats.f_oneway(*groups)
+                    st.write(f"F-value: {f_val:.2f}, p-value: {p_val:.4f}")
+                else:
+                    st.warning("Please select both a Test variable and a Grouping variable for ANOVA.")
         with tab2:
             st.subheader("Pattern Discovery")
             explore_col = st.selectbox("Column to analyze", df.columns)
                 if pattern:
                     matches = df[explore_col].str.contains(pattern).sum()
                     st.write(f"Found {matches} matches")
         with tab3:
             st.subheader("Data Transformation")
             transform_col = st.selectbox("Column to transform", numeric_cols)
                 df[transform_col] = np.sqrt(df[transform_col])
             elif transform_type == "Z-score":
                 df[transform_col] = (df[transform_col] - df[transform_col].mean())/df[transform_col].std()
     # --------------------------
     # Export & Save
     # --------------------------
+    enhance_section_title("Export Options", "💾")
     st.subheader("💾 Export Options")
     col1, col2 = st.columns(2)
     with col1:
         if st.button("📥 Download Current Visualization"):
+            try:
+                fig.write_image("visualization.png")
+                st.success("Image saved!")
+            except NameError:
+                st.error("No visualization to download. Please create a chart first.")
     with col2:
         if st.button("📊 Export Analysis Report"):
+            try:
+                profile = ProfileReport(df, minimal=True)
+                profile.to_file("analysis_report.html")
+                st.success("Report generated!")
+            except Exception as e:
+                st.error(f"Could not generate analysis report. Ensure pandas-profiling is installed correctly.")
 # Streamlit App
 elif app_mode == "Model Training":