Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -261,68 +261,92 @@ if app_mode == "Data Upload":
|
|
261 |
|
262 |
elif app_mode == "Smart Cleaning":
|
263 |
st.title("🧼 Intelligent Data Cleaning")
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
326 |
elif app_mode == "Advanced EDA":
|
327 |
st.title("🔍 Advanced Exploratory Analysis")
|
328 |
|
|
|
261 |
|
262 |
elif app_mode == "Smart Cleaning":
|
263 |
st.title("🧼 Intelligent Data Cleaning")
|
264 |
+
if st.session_state.raw_data is not None:
|
265 |
+
df = st.session_state.cleaned_data
|
266 |
+
|
267 |
+
# Cleaning Toolkit
|
268 |
+
col1, col2 = st.columns([1, 3])
|
269 |
+
with col1:
|
270 |
+
st.subheader("Cleaning Actions")
|
271 |
+
|
272 |
+
clean_action = st.selectbox("Choose Operation", [
|
273 |
+
"Handle Missing Values",
|
274 |
+
"Clean Text",
|
275 |
+
# ... other cleaning operations ...
|
276 |
+
])
|
277 |
+
|
278 |
+
if clean_action == "Handle Missing Values":
|
279 |
+
columns_with_missing = df.columns[df.isnull().any()].tolist()
|
280 |
+
column_to_impute = st.selectbox("Column to Impute", ["All Columns"] + columns_with_missing)
|
281 |
+
|
282 |
+
method = st.selectbox("Imputation Method", [
|
283 |
+
"KNN Imputation",
|
284 |
+
"Median Fill",
|
285 |
+
"Mean Fill",
|
286 |
+
"Drop Missing",
|
287 |
+
"Constant Value Fill"
|
288 |
+
])
|
289 |
+
if method == "KNN Imputation":
|
290 |
+
knn_neighbors = st.slider("KNN Neighbors", 2, 10, 5)
|
291 |
+
elif method == "Constant Value Fill":
|
292 |
+
constant_value = st.text_input("Constant Value")
|
293 |
+
|
294 |
+
elif clean_action == "Clean Text":
|
295 |
+
text_column = st.selectbox("Text Column", df.select_dtypes(include='object').columns)
|
296 |
+
cleaning_operation = st.selectbox("Cleaning Operation", ["Remove Special Characters", "Lowercase", "Uppercase", "Remove Extra Spaces"])
|
297 |
+
if cleaning_operation == "Remove Special Characters":
|
298 |
+
chars_to_remove = st.text_input("Characters to Remove", r'[^a-zA-Z0-9\s]')
|
299 |
+
|
300 |
+
with col2:
|
301 |
+
if st.button("Apply Transformation"):
|
302 |
+
with st.spinner("Applying changes..."):
|
303 |
+
current_df = df.copy()
|
304 |
+
# ... (your data history logic) ...
|
305 |
+
|
306 |
+
if clean_action == "Handle Missing Values":
|
307 |
+
if method == "KNN Imputation":
|
308 |
+
imputer = KNNImputer(n_neighbors=knn_neighbors)
|
309 |
+
if column_to_impute == "All Columns":
|
310 |
+
current_df = pd.DataFrame(imputer.fit_transform(current_df), columns=current_df.columns)
|
311 |
+
else:
|
312 |
+
current_df[[column_to_impute]] = pd.DataFrame(imputer.fit_transform(current_df[[column_to_impute]]), columns=[column_to_impute])
|
313 |
+
elif method == "Median Fill":
|
314 |
+
if column_to_impute == "All Columns":
|
315 |
+
current_df = current_df.fillna(current_df.median())
|
316 |
+
else:
|
317 |
+
current_df[column_to_impute] = current_df[column_to_impute].fillna(current_df[column_to_impute].median())
|
318 |
+
elif method == "Mean Fill":
|
319 |
+
if column_to_impute == "All Columns":
|
320 |
+
current_df = current_df.fillna(current_df.mean())
|
321 |
+
else:
|
322 |
+
current_df[column_to_impute] = current_df[column_to_impute].fillna(current_df[column_to_impute].mean())
|
323 |
+
elif method == "Constant Value Fill":
|
324 |
+
if column_to_impute == "All Columns":
|
325 |
+
current_df = current_df.fillna(constant_value)
|
326 |
+
else:
|
327 |
+
current_df[column_to_impute] = current_df[column_to_impute].fillna(constant_value)
|
328 |
+
else:
|
329 |
+
current_df = current_df.dropna()
|
330 |
+
|
331 |
+
elif clean_action == "Clean Text":
|
332 |
+
import re #moved here since its only used here to avoid library bloat
|
333 |
+
|
334 |
+
def clean_text(text, operation, chars_to_remove=r'[^a-zA-Z0-9\s]'):
|
335 |
+
if operation == "Remove Special Characters":
|
336 |
+
text = re.sub(chars_to_remove, '', str(text))
|
337 |
+
elif operation == "Lowercase":
|
338 |
+
text = str(text).lower()
|
339 |
+
elif operation == "Uppercase":
|
340 |
+
text = str(text).upper()
|
341 |
+
elif operation == "Remove Extra Spaces":
|
342 |
+
text = " ".join(str(text).split())
|
343 |
+
return text
|
344 |
+
|
345 |
+
current_df[text_column] = current_df[text_column].astype(str).apply(lambda x: clean_text(x, cleaning_operation, chars_to_remove))
|
346 |
+
|
347 |
+
st.session_state.cleaned_data = current_df
|
348 |
+
st.success("Transformation applied!")
|
349 |
+
|
350 |
elif app_mode == "Advanced EDA":
|
351 |
st.title("🔍 Advanced Exploratory Analysis")
|
352 |
|