Spaces:

CosmickVisions
/

Data-Vision

Sleeping

App Files Files Community

CosmickVisions commited on Feb 28

Commit

bde5851

verified ·

1 Parent(s): 68a3b7e

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -62

app.py CHANGED Viewed

@@ -261,68 +261,92 @@ if app_mode == "Data Upload":
 elif app_mode == "Smart Cleaning":
     st.title("🧼 Intelligent Data Cleaning")
-    elif clean_action == "Handle Missing Values":
-    columns_with_missing = df.columns[df.isnull().any()].tolist()
-    column_to_impute = st.selectbox("Column to Impute", ["All Columns"] + columns_with_missing) #Choose column
-    method = st.selectbox("Imputation Method", [
-        "KNN Imputation",
-        "Median Fill",
-        "Mean Fill",
-        "Drop Missing",
-        "Constant Value Fill" #new
-    ])
-    if method == "KNN Imputation":
-        knn_neighbors = st.slider("KNN Neighbors", 2, 10, 5)
-    elif method == "Constant Value Fill":
-        constant_value = st.text_input("Constant Value")
-elif clean_action == "Clean Text":
-    text_column = st.selectbox("Text Column", df.select_dtypes(include='object').columns)
-    cleaning_operation = st.selectbox("Cleaning Operation", ["Remove Special Characters", "Lowercase", "Uppercase", "Remove Extra Spaces"])
-    if cleaning_operation == "Remove Special Characters":
-        chars_to_remove = st.text_input("Characters to Remove", r'[^a-zA-Z0-9\s]')
-#Inside the Apply Transformations button section
-elif clean_action == "Handle Missing Values":
-    if method == "KNN Imputation":
-        imputer = KNNImputer(n_neighbors=knn_neighbors)
-        if column_to_impute == "All Columns":
-            current_df = pd.DataFrame(imputer.fit_transform(current_df), columns=current_df.columns)
-        else:
-             current_df[[column_to_impute]] = imputer.fit_transform(current_df[[column_to_impute]])
-    elif method == "Median Fill":
-         if column_to_impute == "All Columns":
-            current_df = current_df.fillna(current_df.median())
-         else:
-            current_df[column_to_impute] = current_df[column_to_impute].fillna(current_df[column_to_impute].median())
-    elif method == "Mean Fill":
-        if column_to_impute == "All Columns":
-            current_df = current_df.fillna(current_df.mean())
-        else:
-            current_df[column_to_impute] = current_df[column_to_impute].fillna(current_df[column_to_impute].mean())
-    elif method == "Constant Value Fill":
-        if column_to_impute == "All Columns":
-             current_df = current_df.fillna(constant_value)
-        else:
-            current_df[column_to_impute] = current_df[column_to_impute].fillna(constant_value)
-    else:
-        current_df = current_df.dropna()
- elif clean_action == "Clean Text":
-      def clean_text(text, operation, chars_to_remove=r'[^a-zA-Z0-9\s]'):
-            if operation == "Remove Special Characters":
-                text = re.sub(chars_to_remove, '', str(text)) #Need to import re at top
-            elif operation == "Lowercase":
-                text = str(text).lower()
-            elif operation == "Uppercase":
-                text = str(text).upper()
-            elif operation == "Remove Extra Spaces":
-                text = " ".join(str(text).split())
-            return text
-      current_df[text_column] = current_df[text_column].apply(lambda x: clean_text(x, cleaning_operation, chars_to_remove))
 elif app_mode == "Advanced EDA":
     st.title("🔍 Advanced Exploratory Analysis")

 elif app_mode == "Smart Cleaning":
     st.title("🧼 Intelligent Data Cleaning")
+    if st.session_state.raw_data is not None:
+        df = st.session_state.cleaned_data
+        # Cleaning Toolkit
+        col1, col2 = st.columns([1, 3])
+        with col1:
+            st.subheader("Cleaning Actions")
+            clean_action = st.selectbox("Choose Operation", [
+                "Handle Missing Values",
+                "Clean Text",
+                # ... other cleaning operations ...
+            ])
+            if clean_action == "Handle Missing Values":
+                columns_with_missing = df.columns[df.isnull().any()].tolist()
+                column_to_impute = st.selectbox("Column to Impute", ["All Columns"] + columns_with_missing)
+                method = st.selectbox("Imputation Method", [
+                    "KNN Imputation",
+                    "Median Fill",
+                    "Mean Fill",
+                    "Drop Missing",
+                    "Constant Value Fill"
+                ])
+                if method == "KNN Imputation":
+                    knn_neighbors = st.slider("KNN Neighbors", 2, 10, 5)
+                elif method == "Constant Value Fill":
+                    constant_value = st.text_input("Constant Value")
+            elif clean_action == "Clean Text":
+                text_column = st.selectbox("Text Column", df.select_dtypes(include='object').columns)
+                cleaning_operation = st.selectbox("Cleaning Operation", ["Remove Special Characters", "Lowercase", "Uppercase", "Remove Extra Spaces"])
+                if cleaning_operation == "Remove Special Characters":
+                    chars_to_remove = st.text_input("Characters to Remove", r'[^a-zA-Z0-9\s]')
+        with col2:
+            if st.button("Apply Transformation"):
+                with st.spinner("Applying changes..."):
+                    current_df = df.copy()
+                    # ... (your data history logic) ...
+                    if clean_action == "Handle Missing Values":
+                        if method == "KNN Imputation":
+                            imputer = KNNImputer(n_neighbors=knn_neighbors)
+                            if column_to_impute == "All Columns":
+                                current_df = pd.DataFrame(imputer.fit_transform(current_df), columns=current_df.columns)
+                            else:
+                                current_df[[column_to_impute]] = pd.DataFrame(imputer.fit_transform(current_df[[column_to_impute]]), columns=[column_to_impute])
+                        elif method == "Median Fill":
+                            if column_to_impute == "All Columns":
+                                current_df = current_df.fillna(current_df.median())
+                            else:
+                                current_df[column_to_impute] = current_df[column_to_impute].fillna(current_df[column_to_impute].median())
+                        elif method == "Mean Fill":
+                            if column_to_impute == "All Columns":
+                                current_df = current_df.fillna(current_df.mean())
+                            else:
+                                current_df[column_to_impute] = current_df[column_to_impute].fillna(current_df[column_to_impute].mean())
+                        elif method == "Constant Value Fill":
+                            if column_to_impute == "All Columns":
+                                current_df = current_df.fillna(constant_value)
+                            else:
+                                current_df[column_to_impute] = current_df[column_to_impute].fillna(constant_value)
+                        else:
+                            current_df = current_df.dropna()
+                    elif clean_action == "Clean Text":
+                        import re #moved here since its only used here to avoid library bloat
+                        def clean_text(text, operation, chars_to_remove=r'[^a-zA-Z0-9\s]'):
+                            if operation == "Remove Special Characters":
+                                text = re.sub(chars_to_remove, '', str(text))
+                            elif operation == "Lowercase":
+                                text = str(text).lower()
+                            elif operation == "Uppercase":
+                                text = str(text).upper()
+                            elif operation == "Remove Extra Spaces":
+                                text = " ".join(str(text).split())
+                            return text
+                        current_df[text_column] = current_df[text_column].astype(str).apply(lambda x: clean_text(x, cleaning_operation, chars_to_remove))
+                    st.session_state.cleaned_data = current_df
+                    st.success("Transformation applied!")
 elif app_mode == "Advanced EDA":
     st.title("🔍 Advanced Exploratory Analysis")