Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -273,6 +273,7 @@ elif app_mode == "Smart Cleaning":
|
|
273 |
clean_action = st.selectbox("Choose Operation", [
|
274 |
"Handle Missing Values",
|
275 |
"Clean Text",
|
|
|
276 |
# ... other cleaning operations ...
|
277 |
])
|
278 |
|
@@ -298,7 +299,13 @@ elif app_mode == "Smart Cleaning":
|
|
298 |
if cleaning_operation == "Remove Special Characters":
|
299 |
chars_to_remove = st.text_input("Characters to Remove", r'[^a-zA-Z0-9\s]')
|
300 |
|
|
|
|
|
|
|
301 |
with col2:
|
|
|
|
|
|
|
302 |
if st.button("Apply Transformation"):
|
303 |
with st.spinner("Applying changes..."):
|
304 |
current_df = df.copy()
|
@@ -330,7 +337,7 @@ elif app_mode == "Smart Cleaning":
|
|
330 |
current_df = current_df.dropna()
|
331 |
|
332 |
elif clean_action == "Clean Text":
|
333 |
-
import re
|
334 |
|
335 |
def clean_text(text, operation, chars_to_remove=r'[^a-zA-Z0-9\s]'):
|
336 |
if operation == "Remove Special Characters":
|
@@ -345,9 +352,15 @@ elif app_mode == "Smart Cleaning":
|
|
345 |
|
346 |
current_df[text_column] = current_df[text_column].astype(str).apply(lambda x: clean_text(x, cleaning_operation, chars_to_remove))
|
347 |
|
|
|
|
|
|
|
348 |
st.session_state.cleaned_data = current_df
|
349 |
st.success("Transformation applied!")
|
350 |
-
|
|
|
|
|
|
|
351 |
elif app_mode == "Advanced EDA":
|
352 |
st.title("🔍 Advanced Exploratory Analysis")
|
353 |
|
@@ -594,12 +607,15 @@ elif app_mode == "Model Training":
|
|
594 |
|
595 |
feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
|
596 |
|
597 |
-
if model_name == "Random Forest":
|
|
|
|
|
598 |
param_grid = {
|
599 |
'n_estimators': st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest."),
|
600 |
'max_depth': st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree."),
|
601 |
'min_samples_split': st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node"), #New hyperparameter
|
602 |
'min_samples_leaf': st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node"), #New hyperparameter
|
|
|
603 |
}
|
604 |
|
605 |
# Train-Test Split
|
@@ -610,6 +626,12 @@ elif app_mode == "Model Training":
|
|
610 |
try:
|
611 |
X = df[feature_columns]
|
612 |
y = df[target_column]
|
|
|
|
|
|
|
|
|
|
|
|
|
613 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
|
614 |
|
615 |
# Preprocessing Pipeline
|
@@ -658,16 +680,23 @@ elif app_mode == "Model Training":
|
|
658 |
elif model_name == "Random Forest":
|
659 |
if problem_type == "Regression":
|
660 |
model = RandomForestRegressor(random_state=42)
|
661 |
-
|
662 |
-
|
663 |
-
|
664 |
-
|
|
|
|
|
|
|
|
|
665 |
else:
|
666 |
model = RandomForestClassifier(random_state=42)
|
667 |
-
|
668 |
-
|
669 |
-
|
670 |
-
|
|
|
|
|
|
|
671 |
|
672 |
elif model_name == "Gradient Boosting":
|
673 |
model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
|
@@ -721,16 +750,17 @@ elif app_mode == "Model Training":
|
|
721 |
st.error(f"Error loading model: {e}")
|
722 |
|
723 |
#Model Evaluation Section
|
724 |
-
|
725 |
-
|
726 |
-
|
727 |
-
|
728 |
-
|
729 |
-
|
730 |
-
|
731 |
-
|
732 |
-
|
733 |
-
|
|
|
734 |
|
735 |
elif app_mode == "Predictions":
|
736 |
st.title("🔮 Make Predictions")
|
|
|
273 |
clean_action = st.selectbox("Choose Operation", [
|
274 |
"Handle Missing Values",
|
275 |
"Clean Text",
|
276 |
+
"Remove Columns", # New option
|
277 |
# ... other cleaning operations ...
|
278 |
])
|
279 |
|
|
|
299 |
if cleaning_operation == "Remove Special Characters":
|
300 |
chars_to_remove = st.text_input("Characters to Remove", r'[^a-zA-Z0-9\s]')
|
301 |
|
302 |
+
elif clean_action == "Remove Columns":
|
303 |
+
remove_cols = st.multiselect("Columns to Remove", df.columns) # Multiselect for column removal
|
304 |
+
|
305 |
with col2:
|
306 |
+
st.subheader("Data Preview") # Added Data Preview Section
|
307 |
+
st.dataframe(df.head(10), use_container_width=True) # Display sample data
|
308 |
+
|
309 |
if st.button("Apply Transformation"):
|
310 |
with st.spinner("Applying changes..."):
|
311 |
current_df = df.copy()
|
|
|
337 |
current_df = current_df.dropna()
|
338 |
|
339 |
elif clean_action == "Clean Text":
|
340 |
+
import re # moved here since its only used here to avoid library bloat
|
341 |
|
342 |
def clean_text(text, operation, chars_to_remove=r'[^a-zA-Z0-9\s]'):
|
343 |
if operation == "Remove Special Characters":
|
|
|
352 |
|
353 |
current_df[text_column] = current_df[text_column].astype(str).apply(lambda x: clean_text(x, cleaning_operation, chars_to_remove))
|
354 |
|
355 |
+
elif clean_action == "Remove Columns":
|
356 |
+
current_df = current_df.drop(columns=remove_cols) # Drop selected columns
|
357 |
+
|
358 |
st.session_state.cleaned_data = current_df
|
359 |
st.success("Transformation applied!")
|
360 |
+
|
361 |
+
if st.button("Refresh Data Preview"): # Button to refresh data preview
|
362 |
+
st.experimental_rerun()
|
363 |
+
|
364 |
elif app_mode == "Advanced EDA":
|
365 |
st.title("🔍 Advanced Exploratory Analysis")
|
366 |
|
|
|
607 |
|
608 |
feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
|
609 |
|
610 |
+
if model_name == "Random Forest" and feature_columns: # Check if Random Forest and features are selected
|
611 |
+
min_features = 1 # Ensure at least one feature is used
|
612 |
+
max_features = len(feature_columns) if len(feature_columns) > 0 else 1 # Use 1 if no features are selected
|
613 |
param_grid = {
|
614 |
'n_estimators': st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest."),
|
615 |
'max_depth': st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree."),
|
616 |
'min_samples_split': st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node"), #New hyperparameter
|
617 |
'min_samples_leaf': st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node"), #New hyperparameter
|
618 |
+
|
619 |
}
|
620 |
|
621 |
# Train-Test Split
|
|
|
626 |
try:
|
627 |
X = df[feature_columns]
|
628 |
y = df[target_column]
|
629 |
+
|
630 |
+
# Check if X is empty
|
631 |
+
if X.empty:
|
632 |
+
st.error("No features were selected. Please select feature columns.")
|
633 |
+
st.stop()
|
634 |
+
|
635 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
|
636 |
|
637 |
# Preprocessing Pipeline
|
|
|
680 |
elif model_name == "Random Forest":
|
681 |
if problem_type == "Regression":
|
682 |
model = RandomForestRegressor(random_state=42)
|
683 |
+
if 'param_grid' in locals():
|
684 |
+
grid_search = GridSearchCV(model, param_grid, cv=3, scoring='neg_mean_squared_error') # Example scoring
|
685 |
+
grid_search.fit(X_train_selected, y_train)
|
686 |
+
model = grid_search.best_estimator_
|
687 |
+
st.write("Best Parameters:", grid_search.best_params_)
|
688 |
+
else:
|
689 |
+
model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined.
|
690 |
+
|
691 |
else:
|
692 |
model = RandomForestClassifier(random_state=42)
|
693 |
+
if 'param_grid' in locals():
|
694 |
+
grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy')
|
695 |
+
grid_search.fit(X_train_selected, y_train)
|
696 |
+
model = grid_search.best_estimator_
|
697 |
+
st.write("Best Parameters:", grid_search.best_params_)
|
698 |
+
else:
|
699 |
+
model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
|
700 |
|
701 |
elif model_name == "Gradient Boosting":
|
702 |
model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
|
|
|
750 |
st.error(f"Error loading model: {e}")
|
751 |
|
752 |
#Model Evaluation Section
|
753 |
+
if 'X_test' in locals() and st.session_state.model is not None:
|
754 |
+
y_pred = st.session_state.model.predict(X_test)
|
755 |
+
|
756 |
+
if problem_type == "Regression":
|
757 |
+
mse = mean_squared_error(y_test, y_pred)
|
758 |
+
r2 = r2_score(y_test, y_pred)
|
759 |
+
st.write(f"Mean Squared Error: {mse:.4f}")
|
760 |
+
st.write(f"R-squared: {r2:.4f}")
|
761 |
+
else:
|
762 |
+
accuracy = accuracy_score(y_test, y_pred)
|
763 |
+
st.write(f"Accuracy: {accuracy:.4f}")
|
764 |
|
765 |
elif app_mode == "Predictions":
|
766 |
st.title("🔮 Make Predictions")
|