CosmickVisions commited on
Commit
bde5851
·
verified ·
1 Parent(s): 68a3b7e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -62
app.py CHANGED
@@ -261,68 +261,92 @@ if app_mode == "Data Upload":
261
 
262
  elif app_mode == "Smart Cleaning":
263
  st.title("🧼 Intelligent Data Cleaning")
264
- elif clean_action == "Handle Missing Values":
265
- columns_with_missing = df.columns[df.isnull().any()].tolist()
266
- column_to_impute = st.selectbox("Column to Impute", ["All Columns"] + columns_with_missing) #Choose column
267
-
268
- method = st.selectbox("Imputation Method", [
269
- "KNN Imputation",
270
- "Median Fill",
271
- "Mean Fill",
272
- "Drop Missing",
273
- "Constant Value Fill" #new
274
- ])
275
- if method == "KNN Imputation":
276
- knn_neighbors = st.slider("KNN Neighbors", 2, 10, 5)
277
- elif method == "Constant Value Fill":
278
- constant_value = st.text_input("Constant Value")
279
-
280
- elif clean_action == "Clean Text":
281
- text_column = st.selectbox("Text Column", df.select_dtypes(include='object').columns)
282
- cleaning_operation = st.selectbox("Cleaning Operation", ["Remove Special Characters", "Lowercase", "Uppercase", "Remove Extra Spaces"])
283
- if cleaning_operation == "Remove Special Characters":
284
- chars_to_remove = st.text_input("Characters to Remove", r'[^a-zA-Z0-9\s]')
285
-
286
- #Inside the Apply Transformations button section
287
- elif clean_action == "Handle Missing Values":
288
- if method == "KNN Imputation":
289
- imputer = KNNImputer(n_neighbors=knn_neighbors)
290
- if column_to_impute == "All Columns":
291
- current_df = pd.DataFrame(imputer.fit_transform(current_df), columns=current_df.columns)
292
- else:
293
- current_df[[column_to_impute]] = imputer.fit_transform(current_df[[column_to_impute]])
294
- elif method == "Median Fill":
295
- if column_to_impute == "All Columns":
296
- current_df = current_df.fillna(current_df.median())
297
- else:
298
- current_df[column_to_impute] = current_df[column_to_impute].fillna(current_df[column_to_impute].median())
299
- elif method == "Mean Fill":
300
- if column_to_impute == "All Columns":
301
- current_df = current_df.fillna(current_df.mean())
302
- else:
303
- current_df[column_to_impute] = current_df[column_to_impute].fillna(current_df[column_to_impute].mean())
304
- elif method == "Constant Value Fill":
305
- if column_to_impute == "All Columns":
306
- current_df = current_df.fillna(constant_value)
307
- else:
308
- current_df[column_to_impute] = current_df[column_to_impute].fillna(constant_value)
309
- else:
310
- current_df = current_df.dropna()
311
-
312
- elif clean_action == "Clean Text":
313
- def clean_text(text, operation, chars_to_remove=r'[^a-zA-Z0-9\s]'):
314
- if operation == "Remove Special Characters":
315
- text = re.sub(chars_to_remove, '', str(text)) #Need to import re at top
316
- elif operation == "Lowercase":
317
- text = str(text).lower()
318
- elif operation == "Uppercase":
319
- text = str(text).upper()
320
- elif operation == "Remove Extra Spaces":
321
- text = " ".join(str(text).split())
322
- return text
323
-
324
- current_df[text_column] = current_df[text_column].apply(lambda x: clean_text(x, cleaning_operation, chars_to_remove))
325
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
  elif app_mode == "Advanced EDA":
327
  st.title("🔍 Advanced Exploratory Analysis")
328
 
 
261
 
262
  elif app_mode == "Smart Cleaning":
263
  st.title("🧼 Intelligent Data Cleaning")
264
+ if st.session_state.raw_data is not None:
265
+ df = st.session_state.cleaned_data
266
+
267
+ # Cleaning Toolkit
268
+ col1, col2 = st.columns([1, 3])
269
+ with col1:
270
+ st.subheader("Cleaning Actions")
271
+
272
+ clean_action = st.selectbox("Choose Operation", [
273
+ "Handle Missing Values",
274
+ "Clean Text",
275
+ # ... other cleaning operations ...
276
+ ])
277
+
278
+ if clean_action == "Handle Missing Values":
279
+ columns_with_missing = df.columns[df.isnull().any()].tolist()
280
+ column_to_impute = st.selectbox("Column to Impute", ["All Columns"] + columns_with_missing)
281
+
282
+ method = st.selectbox("Imputation Method", [
283
+ "KNN Imputation",
284
+ "Median Fill",
285
+ "Mean Fill",
286
+ "Drop Missing",
287
+ "Constant Value Fill"
288
+ ])
289
+ if method == "KNN Imputation":
290
+ knn_neighbors = st.slider("KNN Neighbors", 2, 10, 5)
291
+ elif method == "Constant Value Fill":
292
+ constant_value = st.text_input("Constant Value")
293
+
294
+ elif clean_action == "Clean Text":
295
+ text_column = st.selectbox("Text Column", df.select_dtypes(include='object').columns)
296
+ cleaning_operation = st.selectbox("Cleaning Operation", ["Remove Special Characters", "Lowercase", "Uppercase", "Remove Extra Spaces"])
297
+ if cleaning_operation == "Remove Special Characters":
298
+ chars_to_remove = st.text_input("Characters to Remove", r'[^a-zA-Z0-9\s]')
299
+
300
+ with col2:
301
+ if st.button("Apply Transformation"):
302
+ with st.spinner("Applying changes..."):
303
+ current_df = df.copy()
304
+ # ... (your data history logic) ...
305
+
306
+ if clean_action == "Handle Missing Values":
307
+ if method == "KNN Imputation":
308
+ imputer = KNNImputer(n_neighbors=knn_neighbors)
309
+ if column_to_impute == "All Columns":
310
+ current_df = pd.DataFrame(imputer.fit_transform(current_df), columns=current_df.columns)
311
+ else:
312
+ current_df[[column_to_impute]] = pd.DataFrame(imputer.fit_transform(current_df[[column_to_impute]]), columns=[column_to_impute])
313
+ elif method == "Median Fill":
314
+ if column_to_impute == "All Columns":
315
+ current_df = current_df.fillna(current_df.median())
316
+ else:
317
+ current_df[column_to_impute] = current_df[column_to_impute].fillna(current_df[column_to_impute].median())
318
+ elif method == "Mean Fill":
319
+ if column_to_impute == "All Columns":
320
+ current_df = current_df.fillna(current_df.mean())
321
+ else:
322
+ current_df[column_to_impute] = current_df[column_to_impute].fillna(current_df[column_to_impute].mean())
323
+ elif method == "Constant Value Fill":
324
+ if column_to_impute == "All Columns":
325
+ current_df = current_df.fillna(constant_value)
326
+ else:
327
+ current_df[column_to_impute] = current_df[column_to_impute].fillna(constant_value)
328
+ else:
329
+ current_df = current_df.dropna()
330
+
331
+ elif clean_action == "Clean Text":
332
+ import re #moved here since its only used here to avoid library bloat
333
+
334
+ def clean_text(text, operation, chars_to_remove=r'[^a-zA-Z0-9\s]'):
335
+ if operation == "Remove Special Characters":
336
+ text = re.sub(chars_to_remove, '', str(text))
337
+ elif operation == "Lowercase":
338
+ text = str(text).lower()
339
+ elif operation == "Uppercase":
340
+ text = str(text).upper()
341
+ elif operation == "Remove Extra Spaces":
342
+ text = " ".join(str(text).split())
343
+ return text
344
+
345
+ current_df[text_column] = current_df[text_column].astype(str).apply(lambda x: clean_text(x, cleaning_operation, chars_to_remove))
346
+
347
+ st.session_state.cleaned_data = current_df
348
+ st.success("Transformation applied!")
349
+
350
  elif app_mode == "Advanced EDA":
351
  st.title("🔍 Advanced Exploratory Analysis")
352