Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -726,122 +726,108 @@ elif app_mode == "Advanced EDA":
|
|
726 |
st.error(f"An error occurred while generating the plot: {e}")
|
727 |
|
728 |
elif app_mode == "Model Training":
|
729 |
-
st.title("
|
730 |
|
731 |
if st.session_state.cleaned_data is not None:
|
732 |
df = st.session_state.cleaned_data.copy()
|
733 |
|
734 |
# Target Variable Selection
|
735 |
-
target_column = st.selectbox("Select Target Variable", df.columns, help="Choose the column
|
736 |
|
737 |
# Problem Type Selection
|
738 |
-
problem_type = st.radio("Select Problem Type", ["Regression", "Classification"], help="Choose the type of
|
739 |
|
740 |
-
# Feature Selection
|
741 |
-
|
742 |
-
if use_all_features:
|
743 |
-
feature_columns = df.drop(columns=[target_column]).columns.tolist()
|
744 |
-
else:
|
745 |
-
feature_columns = st.multiselect("Select Feature Columns", df.drop(columns=[target_column]).columns, help="Choose the features you want to use for prediction.")
|
746 |
|
747 |
# Model Selection
|
748 |
-
|
749 |
-
"Linear Regression", "
|
750 |
-
|
|
|
751 |
|
752 |
-
# Hyperparameter Tuning
|
753 |
-
|
754 |
-
|
755 |
-
|
756 |
-
elif model_type in ["Random Forest", "Gradient Boosting", "Random Forest Classifier"]:
|
757 |
-
n_estimators = st.slider("Number of Estimators", 50, 200, 100, help="Number of trees in the forest.")
|
758 |
-
elif model_type == "Support Vector Machine":
|
759 |
-
C = st.slider("C", 0.1, 10.0, 1.0, help="Regularization parameter.")
|
760 |
|
761 |
# Train-Test Split
|
762 |
-
test_size = st.slider("Test Size", 0.1, 0.5, 0.2, help="Proportion of the
|
763 |
|
764 |
-
# Model Training Button
|
765 |
if st.button("Train Model"):
|
766 |
with st.spinner("Training model..."):
|
767 |
-
|
768 |
-
|
769 |
-
|
770 |
-
|
771 |
-
|
772 |
-
|
773 |
-
|
774 |
-
|
775 |
-
|
776 |
-
|
777 |
-
|
778 |
-
|
779 |
-
('onehot', OneHotEncoder(handle_unknown='ignore'))
|
780 |
-
])
|
781 |
-
|
782 |
-
numeric_features = X_train.select_dtypes(include=['int64', 'float64']).columns
|
783 |
-
categorical_features = X_train.select_dtypes(include=['object']).columns
|
784 |
-
|
785 |
-
preprocessor = ColumnTransformer(
|
786 |
-
transformers=[
|
787 |
-
('num', numeric_transformer, numeric_features),
|
788 |
-
('cat', categorical_transformer, categorical_features)
|
789 |
])
|
790 |
|
791 |
-
|
792 |
-
|
793 |
-
|
794 |
-
|
795 |
-
|
796 |
-
|
797 |
-
|
798 |
-
|
799 |
-
|
800 |
-
|
801 |
-
|
802 |
-
|
803 |
-
|
804 |
-
|
805 |
-
|
806 |
-
|
807 |
-
|
808 |
-
|
809 |
-
|
810 |
-
|
811 |
-
|
812 |
-
|
813 |
-
|
814 |
-
|
815 |
-
|
816 |
-
|
817 |
-
|
818 |
-
|
819 |
-
|
820 |
-
|
821 |
-
|
822 |
-
|
823 |
-
|
824 |
-
|
825 |
-
|
826 |
-
|
827 |
-
|
828 |
-
|
829 |
-
st.
|
830 |
-
|
831 |
-
|
832 |
-
|
833 |
-
|
834 |
-
|
835 |
-
|
836 |
-
|
837 |
-
|
838 |
-
|
839 |
-
|
840 |
-
|
841 |
-
st.write(f"Recall: {recall:.4f}")
|
842 |
-
st.write(f"F1 Score: {f1:.4f}")
|
843 |
-
st.write(f"ROC AUC: {roc_auc:.4f}")
|
844 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
845 |
elif app_mode == "Predictions":
|
846 |
st.title("🔮 Make Predictions")
|
847 |
|
@@ -869,6 +855,13 @@ elif app_mode == "Predictions":
|
|
869 |
prediction = st.session_state.model.predict(input_df)[0]
|
870 |
st.subheader("Prediction Result")
|
871 |
st.write(f"The predicted value is: {prediction}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
872 |
except Exception as e:
|
873 |
st.error(f"An error occurred during prediction: {e}")
|
874 |
else:
|
@@ -1120,15 +1113,15 @@ elif app_mode == "Neural Network Studio":
|
|
1120 |
st.write(f"R-squared: {r2:.4f}")
|
1121 |
else:
|
1122 |
accuracy = accuracy_score(y_test, y_pred)
|
1123 |
-
precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
|
1124 |
-
recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
|
1125 |
-
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
|
1126 |
st.write(f"Accuracy: {accuracy:.4f}")
|
1127 |
st.write(f"Precision: {precision:.4f}")
|
1128 |
st.write(f"Recall: {recall:.4f}")
|
1129 |
st.write(f"F1 Score: {f1:.4f}")
|
1130 |
st.write("Classification Report:")
|
1131 |
-
st.text(classification_report(y_test, y_pred))
|
1132 |
|
1133 |
st.success("Model trained successfully!")
|
1134 |
|
|
|
726 |
st.error(f"An error occurred while generating the plot: {e}")
|
727 |
|
728 |
elif app_mode == "Model Training":
|
729 |
+
st.title("🚂 Model Training")
|
730 |
|
731 |
if st.session_state.cleaned_data is not None:
|
732 |
df = st.session_state.cleaned_data.copy()
|
733 |
|
734 |
# Target Variable Selection
|
735 |
+
target_column = st.selectbox("Select Target Variable", df.columns, help="Choose the column to predict.")
|
736 |
|
737 |
# Problem Type Selection
|
738 |
+
problem_type = st.radio("Select Problem Type", ["Regression", "Classification"], help="Choose the type of problem.")
|
739 |
|
740 |
+
# Feature Selection
|
741 |
+
feature_columns = st.multiselect("Select Feature Columns", df.drop(columns=[target_column]).columns, help="Choose features for training.")
|
|
|
|
|
|
|
|
|
742 |
|
743 |
# Model Selection
|
744 |
+
model_name = st.selectbox("Select Model", [
|
745 |
+
"Linear Regression", "Logistic Regression", "Decision Tree",
|
746 |
+
"Random Forest", "Gradient Boosting", "SVM"
|
747 |
+
], help="Choose a model.")
|
748 |
|
749 |
+
# Hyperparameter Tuning (Example - Add more as needed)
|
750 |
+
if model_name == "Random Forest":
|
751 |
+
n_estimators = st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.")
|
752 |
+
max_depth = st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.")
|
|
|
|
|
|
|
|
|
753 |
|
754 |
# Train-Test Split
|
755 |
+
test_size = st.slider("Test Size", 0.1, 0.5, 0.2, help="Proportion of the dataset to include in the test split.")
|
756 |
|
|
|
757 |
if st.button("Train Model"):
|
758 |
with st.spinner("Training model..."):
|
759 |
+
try:
|
760 |
+
X = df[feature_columns]
|
761 |
+
y = df[target_column]
|
762 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
|
763 |
+
|
764 |
+
# Preprocessing Pipeline
|
765 |
+
numeric_features = X.select_dtypes(include=np.number).columns
|
766 |
+
categorical_features = X.select_dtypes(exclude=np.number).columns
|
767 |
+
|
768 |
+
numeric_transformer = Pipeline(steps=[
|
769 |
+
('imputer', SimpleImputer(strategy='median')),
|
770 |
+
('scaler', StandardScaler())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
771 |
])
|
772 |
|
773 |
+
categorical_transformer = Pipeline(steps=[
|
774 |
+
('imputer', SimpleImputer(strategy='most_frequent')),
|
775 |
+
('onehot', OneHotEncoder(handle_unknown='ignore'))
|
776 |
+
])
|
777 |
+
|
778 |
+
preprocessor = ColumnTransformer(
|
779 |
+
transformers=[
|
780 |
+
('num', numeric_transformer, numeric_features),
|
781 |
+
('cat', categorical_transformer, categorical_features)
|
782 |
+
])
|
783 |
+
|
784 |
+
X_train_processed = preprocessor.fit_transform(X_train)
|
785 |
+
X_test_processed = preprocessor.transform(X_test)
|
786 |
+
|
787 |
+
# Model Training
|
788 |
+
if model_name == "Linear Regression":
|
789 |
+
model = LinearRegression()
|
790 |
+
elif model_name == "Logistic Regression":
|
791 |
+
model = LogisticRegression(max_iter=1000)
|
792 |
+
elif model_name == "Decision Tree":
|
793 |
+
if problem_type == "Regression":
|
794 |
+
model = DecisionTreeRegressor()
|
795 |
+
else:
|
796 |
+
model = DecisionTreeClassifier()
|
797 |
+
elif model_name == "Random Forest":
|
798 |
+
if problem_type == "Regression":
|
799 |
+
model = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth)
|
800 |
+
else:
|
801 |
+
model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth)
|
802 |
+
elif model_name == "Gradient Boosting":
|
803 |
+
model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
|
804 |
+
elif model_name == "SVM":
|
805 |
+
model = SVR() if problem_type == "Regression" else SVC()
|
806 |
+
|
807 |
+
model.fit(X_train_processed, y_train)
|
808 |
+
|
809 |
+
# Store model and preprocessor
|
810 |
+
st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
|
811 |
+
st.session_state.preprocessor = preprocessor
|
812 |
+
|
813 |
+
# Model Evaluation
|
814 |
+
y_pred = model.predict(X_test_processed)
|
815 |
+
if problem_type == "Regression":
|
816 |
+
mse = mean_squared_error(y_test, y_pred)
|
817 |
+
r2 = r2_score(y_test, y_pred)
|
818 |
+
st.write(f"Mean Squared Error: {mse:.4f}")
|
819 |
+
st.write(f"R-squared: {r2:.4f}")
|
820 |
+
else:
|
821 |
+
accuracy = accuracy_score(y_test, y_pred)
|
822 |
+
st.write(f"Accuracy: {accuracy:.4f}")
|
|
|
|
|
|
|
823 |
|
824 |
+
st.success("Model trained successfully!")
|
825 |
+
|
826 |
+
except Exception as e:
|
827 |
+
st.error(f"An error occurred: {e}")
|
828 |
+
else:
|
829 |
+
st.write("Please upload and clean data first.")
|
830 |
+
|
831 |
elif app_mode == "Predictions":
|
832 |
st.title("🔮 Make Predictions")
|
833 |
|
|
|
855 |
prediction = st.session_state.model.predict(input_df)[0]
|
856 |
st.subheader("Prediction Result")
|
857 |
st.write(f"The predicted value is: {prediction}")
|
858 |
+
|
859 |
+
# Additional Feedback (Example for Classification)
|
860 |
+
if isinstance(st.session_state.model.steps[-1][1], LogisticRegression):
|
861 |
+
probabilities = st.session_state.model.predict_proba(input_df)[0]
|
862 |
+
st.write("Predicted Probabilities:")
|
863 |
+
st.write(probabilities)
|
864 |
+
|
865 |
except Exception as e:
|
866 |
st.error(f"An error occurred during prediction: {e}")
|
867 |
else:
|
|
|
1113 |
st.write(f"R-squared: {r2:.4f}")
|
1114 |
else:
|
1115 |
accuracy = accuracy_score(y_test, y_pred)
|
1116 |
+
precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
|
1117 |
+
recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
|
1118 |
+
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
|
1119 |
st.write(f"Accuracy: {accuracy:.4f}")
|
1120 |
st.write(f"Precision: {precision:.4f}")
|
1121 |
st.write(f"Recall: {recall:.4f}")
|
1122 |
st.write(f"F1 Score: {f1:.4f}")
|
1123 |
st.write("Classification Report:")
|
1124 |
+
st.text(classification_report(y_test, y_pred))
|
1125 |
|
1126 |
st.success("Model trained successfully!")
|
1127 |
|