Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -353,13 +353,14 @@ elif app_mode == "Smart Cleaning":
|
|
353 |
current_df[text_column] = current_df[text_column].astype(str).apply(lambda x: clean_text(x, cleaning_operation, chars_to_remove))
|
354 |
|
355 |
elif clean_action == "Remove Columns":
|
356 |
-
|
|
|
357 |
|
358 |
st.session_state.cleaned_data = current_df
|
359 |
st.success("Transformation applied!")
|
360 |
|
361 |
if st.button("Refresh Data Preview"): # Button to refresh data preview
|
362 |
-
st.
|
363 |
|
364 |
elif app_mode == "Advanced EDA":
|
365 |
st.title("🔍 Advanced Exploratory Analysis")
|
@@ -611,11 +612,10 @@ elif app_mode == "Model Training":
|
|
611 |
min_features = 1 # Ensure at least one feature is used
|
612 |
max_features = len(feature_columns) if len(feature_columns) > 0 else 1 # Use 1 if no features are selected
|
613 |
param_grid = {
|
614 |
-
'n_estimators': st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest."),
|
615 |
-
'max_depth': st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree."),
|
616 |
-
'min_samples_split': st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node"), #New hyperparameter
|
617 |
-
'min_samples_leaf': st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node"), #New hyperparameter
|
618 |
-
|
619 |
}
|
620 |
|
621 |
# Train-Test Split
|
@@ -699,6 +699,7 @@ elif app_mode == "Model Training":
|
|
699 |
model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
|
700 |
|
701 |
elif model_name == "Gradient Boosting":
|
|
|
702 |
model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
|
703 |
elif model_name == "SVM":
|
704 |
model = SVR() if problem_type == "Regression" else SVC()
|
@@ -761,6 +762,76 @@ elif app_mode == "Model Training":
|
|
761 |
else:
|
762 |
accuracy = accuracy_score(y_test, y_pred)
|
763 |
st.write(f"Accuracy: {accuracy:.4f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
764 |
|
765 |
elif app_mode == "Predictions":
|
766 |
st.title("🔮 Make Predictions")
|
|
|
353 |
current_df[text_column] = current_df[text_column].astype(str).apply(lambda x: clean_text(x, cleaning_operation, chars_to_remove))
|
354 |
|
355 |
elif clean_action == "Remove Columns":
|
356 |
+
if remove_cols: #Check that it is not empty
|
357 |
+
current_df = current_df.drop(columns=remove_cols) # Drop selected columns
|
358 |
|
359 |
st.session_state.cleaned_data = current_df
|
360 |
st.success("Transformation applied!")
|
361 |
|
362 |
if st.button("Refresh Data Preview"): # Button to refresh data preview
|
363 |
+
st.rerun()
|
364 |
|
365 |
elif app_mode == "Advanced EDA":
|
366 |
st.title("🔍 Advanced Exploratory Analysis")
|
|
|
612 |
min_features = 1 # Ensure at least one feature is used
|
613 |
max_features = len(feature_columns) if len(feature_columns) > 0 else 1 # Use 1 if no features are selected
|
614 |
param_grid = {
|
615 |
+
'n_estimators': list(range(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest."),(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.")+1))),
|
616 |
+
'max_depth': list(range(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree."),(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.")+1))),
|
617 |
+
'min_samples_split': list(range(st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node"),(st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node")+1))), #New hyperparameter
|
618 |
+
'min_samples_leaf': list(range(st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node"),(st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node")+1))), #New hyperparameter
|
|
|
619 |
}
|
620 |
|
621 |
# Train-Test Split
|
|
|
699 |
model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
|
700 |
|
701 |
elif model_name == "Gradient Boosting":
|
702 |
+
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier #moved import here to avoid bloat
|
703 |
model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
|
704 |
elif model_name == "SVM":
|
705 |
model = SVR() if problem_type == "Regression" else SVC()
|
|
|
762 |
else:
|
763 |
accuracy = accuracy_score(y_test, y_pred)
|
764 |
st.write(f"Accuracy: {accuracy:.4f}")
|
765 |
+
|
766 |
+
elif app_mode == "Predictions":
|
767 |
+
st.title("🔮 Make Predictions")
|
768 |
+
|
769 |
+
if st.session_state.model is not None and st.session_state.cleaned_data is not None:
|
770 |
+
df = st.session_state.cleaned_data.copy()
|
771 |
+
|
772 |
+
# Input data for prediction
|
773 |
+
st.subheader("Enter Data for Prediction")
|
774 |
+
input_data = {}
|
775 |
+
|
776 |
+
try:
|
777 |
+
numeric_transformer_columns = st.session_state.model.steps[0][1].transformers_[0][2] if hasattr(st.session_state.model.steps[0][1].transformers_[0][2], '__len__') else []
|
778 |
+
categorical_transformer_columns = st.session_state.model.steps[0][1].transformers_[1][2] if hasattr(st.session_state.model.steps[0][1].transformers_[1][2], '__len__') else []
|
779 |
+
model_columns = numeric_transformer_columns + categorical_transformer_columns
|
780 |
+
except AttributeError as e:
|
781 |
+
st.error(f"Error accessing model transformers: {e}. Please ensure a valid model is trained and loaded.")
|
782 |
+
st.stop()
|
783 |
+
|
784 |
+
if not set(model_columns).issubset(set(df.columns)): #Fixed comparison
|
785 |
+
st.error("The model was trained on a dataframe that contains different columns than the currently uploaded dataframe. Please upload the correct dataframe.")
|
786 |
+
st.stop()
|
787 |
+
|
788 |
+
for col in model_columns:
|
789 |
+
if pd.api.types.is_numeric_dtype(df[col]):
|
790 |
+
input_data[col] = st.number_input(f"Enter {col}", value=df[col].mean())
|
791 |
+
else:
|
792 |
+
input_data[col] = st.selectbox(f"Select {col}", df[col].unique())
|
793 |
+
|
794 |
+
# Prediction Button
|
795 |
+
if st.button("Make Prediction"):
|
796 |
+
try:
|
797 |
+
input_df = pd.DataFrame([input_data])
|
798 |
+
prediction = st.session_state.model.predict(input_df)[0]
|
799 |
+
st.subheader("Prediction Result")
|
800 |
+
st.write(f"The predicted value is: {prediction}")
|
801 |
+
|
802 |
+
# Additional Feedback (Example for Classification)
|
803 |
+
if isinstance(st.session_state.model.steps[-1][1], LogisticRegression):
|
804 |
+
probabilities = st.session_state.model.predict_proba(input_df)[0]
|
805 |
+
st.write("Predicted Probabilities:")
|
806 |
+
st.write(probabilities)
|
807 |
+
|
808 |
+
except Exception as e:
|
809 |
+
st.error(f"An error occurred during prediction: {e}")
|
810 |
+
|
811 |
+
#Add batch prediction section in prediction tab
|
812 |
+
st.subheader("Batch Predictions")
|
813 |
+
batch_file = st.file_uploader("Upload CSV for Batch Predictions", type=["csv"])
|
814 |
+
if batch_file is not None:
|
815 |
+
try:
|
816 |
+
batch_df = pd.read_csv(batch_file)
|
817 |
+
# Preprocess the batch data
|
818 |
+
batch_processed = st.session_state.preprocessor.transform(batch_df)
|
819 |
+
# Make predictions
|
820 |
+
batch_predictions = st.session_state.model.predict(batch_processed)
|
821 |
+
batch_df['Prediction'] = batch_predictions
|
822 |
+
st.dataframe(batch_df)
|
823 |
+
|
824 |
+
# Download predictions
|
825 |
+
csv = batch_df.to_csv(index=False)
|
826 |
+
b64 = base64.b64encode(csv.encode()).decode() # some strings
|
827 |
+
href = f'<a href="data:file/csv;base64,{b64}" download="predictions.csv">Download Predictions CSV</a>'
|
828 |
+
st.markdown(href, unsafe_allow_html=True)
|
829 |
+
|
830 |
+
except Exception as e:
|
831 |
+
st.error(f"Error processing batch file: {e}")
|
832 |
+
|
833 |
+
else:
|
834 |
+
st.write("Please train a model first in the 'Model Training' section.")
|
835 |
|
836 |
elif app_mode == "Predictions":
|
837 |
st.title("🔮 Make Predictions")
|