CosmickVisions commited on
Commit
c72ced1
·
verified ·
1 Parent(s): 9a91a20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -101
app.py CHANGED
@@ -726,122 +726,108 @@ elif app_mode == "Advanced EDA":
726
  st.error(f"An error occurred while generating the plot: {e}")
727
 
728
  elif app_mode == "Model Training":
729
- st.title("🧠 Model Training & Evaluation")
730
 
731
  if st.session_state.cleaned_data is not None:
732
  df = st.session_state.cleaned_data.copy()
733
 
734
  # Target Variable Selection
735
- target_column = st.selectbox("Select Target Variable", df.columns, help="Choose the column you want to predict.")
736
 
737
  # Problem Type Selection
738
- problem_type = st.radio("Select Problem Type", ["Regression", "Classification"], help="Choose the type of machine learning problem.")
739
 
740
- # Feature Selection (optional)
741
- use_all_features = st.checkbox("Use All Features", value=True, help="Select to use all features for training. Deselect to manually choose features.")
742
- if use_all_features:
743
- feature_columns = df.drop(columns=[target_column]).columns.tolist()
744
- else:
745
- feature_columns = st.multiselect("Select Feature Columns", df.drop(columns=[target_column]).columns, help="Choose the features you want to use for prediction.")
746
 
747
  # Model Selection
748
- model_type = st.selectbox("Select Model", [
749
- "Linear Regression", "Decision Tree", "Random Forest", "Gradient Boosting", "Support Vector Machine", "Logistic Regression", "Random Forest Classifier"
750
- ], help="Choose the machine learning model to use.")
 
751
 
752
- # Hyperparameter Tuning
753
- with st.expander("Hyperparameter Tuning", expanded=False):
754
- if model_type == "Decision Tree":
755
- max_depth = st.slider("Max Depth", 2, 30, 5, help="Maximum depth of the decision tree.")
756
- elif model_type in ["Random Forest", "Gradient Boosting", "Random Forest Classifier"]:
757
- n_estimators = st.slider("Number of Estimators", 50, 200, 100, help="Number of trees in the forest.")
758
- elif model_type == "Support Vector Machine":
759
- C = st.slider("C", 0.1, 10.0, 1.0, help="Regularization parameter.")
760
 
761
  # Train-Test Split
762
- test_size = st.slider("Test Size", 0.1, 0.5, 0.2, help="Proportion of the data to use for testing.")
763
 
764
- # Model Training Button
765
  if st.button("Train Model"):
766
  with st.spinner("Training model..."):
767
- # Split data
768
- X = df[feature_columns]
769
- y = df[target_column]
770
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
771
-
772
- # Preprocessing
773
- numeric_transformer = Pipeline(steps=[
774
- ('imputer', SimpleImputer(strategy='median')),
775
- ('scaler', StandardScaler())
776
- ])
777
- categorical_transformer = Pipeline(steps=[
778
- ('imputer', SimpleImputer(strategy='most_frequent')),
779
- ('onehot', OneHotEncoder(handle_unknown='ignore'))
780
- ])
781
-
782
- numeric_features = X_train.select_dtypes(include=['int64', 'float64']).columns
783
- categorical_features = X_train.select_dtypes(include=['object']).columns
784
-
785
- preprocessor = ColumnTransformer(
786
- transformers=[
787
- ('num', numeric_transformer, numeric_features),
788
- ('cat', categorical_transformer, categorical_features)
789
  ])
790
 
791
- # Model Selection and Training
792
- if model_type == "Linear Regression":
793
- model = Pipeline(steps=[('preprocessor', preprocessor),
794
- ('regressor', LinearRegression())])
795
- elif model_type == "Decision Tree":
796
- model = Pipeline(steps=[('preprocessor', preprocessor),
797
- ('regressor', DecisionTreeRegressor(max_depth=max_depth, random_state=42))])
798
- elif model_type == "Random Forest":
799
- model = Pipeline(steps=[('preprocessor', preprocessor),
800
- ('regressor', RandomForestRegressor(n_estimators=n_estimators, random_state=42))])
801
- elif model_type == "Gradient Boosting":
802
- model = Pipeline(steps=[('preprocessor', preprocessor),
803
- ('regressor', GradientBoostingRegressor(n_estimators=n_estimators, random_state=42))])
804
- elif model_type == "Support Vector Machine":
805
- model = Pipeline(steps=[('preprocessor', preprocessor),
806
- ('regressor', SVR(C=C))])
807
- elif model_type == "Logistic Regression":
808
- model = Pipeline(steps=[('preprocessor', preprocessor),
809
- ('classifier', LogisticRegression(random_state=42))])
810
- elif model_type == "Random Forest Classifier":
811
- model = Pipeline(steps=[('preprocessor', preprocessor),
812
- ('classifier', RandomForestClassifier(n_estimators=n_estimators, random_state=42))])
813
-
814
- model.fit(X_train, y_train)
815
-
816
- # Store the trained model and preprocessor in session state
817
- st.session_state.model = model
818
- st.session_state.preprocessor = preprocessor
819
-
820
- # Make predictions
821
- y_pred = model.predict(X_test)
822
-
823
- # Evaluation
824
- if problem_type == "Regression":
825
- mse = mean_squared_error(y_test, y_pred)
826
- rmse = np.sqrt(mse)
827
- mae = mean_absolute_error(y_test, y_pred)
828
- r2 = r2_score(y_test, y_pred)
829
- st.write(f"Mean Squared Error: {mse:.4f}")
830
- st.write(f"Root Mean Squared Error: {rmse:.4f}")
831
- st.write(f"Mean Absolute Error: {mae:.4f}")
832
- st.write(f"R-squared: {r2:.4f}")
833
- else:
834
- accuracy = accuracy_score(y_test, y_pred)
835
- precision = precision_score(y_test,y_pred)
836
- recall = recall_score(y_test, y_pred)
837
- f1 = f1_score(y_test, y_pred)
838
- roc_auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])
839
- st.write(f"Accuracy: {accuracy:.4f}")
840
- st.write(f"Precision: {precision:.4f}")
841
- st.write(f"Recall: {recall:.4f}")
842
- st.write(f"F1 Score: {f1:.4f}")
843
- st.write(f"ROC AUC: {roc_auc:.4f}")
844
 
 
 
 
 
 
 
 
845
  elif app_mode == "Predictions":
846
  st.title("🔮 Make Predictions")
847
 
@@ -869,6 +855,13 @@ elif app_mode == "Predictions":
869
  prediction = st.session_state.model.predict(input_df)[0]
870
  st.subheader("Prediction Result")
871
  st.write(f"The predicted value is: {prediction}")
 
 
 
 
 
 
 
872
  except Exception as e:
873
  st.error(f"An error occurred during prediction: {e}")
874
  else:
@@ -1120,15 +1113,15 @@ elif app_mode == "Neural Network Studio":
1120
  st.write(f"R-squared: {r2:.4f}")
1121
  else:
1122
  accuracy = accuracy_score(y_test, y_pred)
1123
- precision = precision_score(y_test, y_pred, average='weighted', zero_division=0) #Added zero_division
1124
- recall = recall_score(y_test, y_pred, average='weighted', zero_division=0) #Added zero_division
1125
- f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0) #Added zero_division
1126
  st.write(f"Accuracy: {accuracy:.4f}")
1127
  st.write(f"Precision: {precision:.4f}")
1128
  st.write(f"Recall: {recall:.4f}")
1129
  st.write(f"F1 Score: {f1:.4f}")
1130
  st.write("Classification Report:")
1131
- st.text(classification_report(y_test, y_pred)) #added classification report
1132
 
1133
  st.success("Model trained successfully!")
1134
 
 
726
  st.error(f"An error occurred while generating the plot: {e}")
727
 
728
  elif app_mode == "Model Training":
729
+ st.title("🚂 Model Training")
730
 
731
  if st.session_state.cleaned_data is not None:
732
  df = st.session_state.cleaned_data.copy()
733
 
734
  # Target Variable Selection
735
+ target_column = st.selectbox("Select Target Variable", df.columns, help="Choose the column to predict.")
736
 
737
  # Problem Type Selection
738
+ problem_type = st.radio("Select Problem Type", ["Regression", "Classification"], help="Choose the type of problem.")
739
 
740
+ # Feature Selection
741
+ feature_columns = st.multiselect("Select Feature Columns", df.drop(columns=[target_column]).columns, help="Choose features for training.")
 
 
 
 
742
 
743
  # Model Selection
744
+ model_name = st.selectbox("Select Model", [
745
+ "Linear Regression", "Logistic Regression", "Decision Tree",
746
+ "Random Forest", "Gradient Boosting", "SVM"
747
+ ], help="Choose a model.")
748
 
749
+ # Hyperparameter Tuning (Example - Add more as needed)
750
+ if model_name == "Random Forest":
751
+ n_estimators = st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.")
752
+ max_depth = st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.")
 
 
 
 
753
 
754
  # Train-Test Split
755
+ test_size = st.slider("Test Size", 0.1, 0.5, 0.2, help="Proportion of the dataset to include in the test split.")
756
 
 
757
  if st.button("Train Model"):
758
  with st.spinner("Training model..."):
759
+ try:
760
+ X = df[feature_columns]
761
+ y = df[target_column]
762
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
763
+
764
+ # Preprocessing Pipeline
765
+ numeric_features = X.select_dtypes(include=np.number).columns
766
+ categorical_features = X.select_dtypes(exclude=np.number).columns
767
+
768
+ numeric_transformer = Pipeline(steps=[
769
+ ('imputer', SimpleImputer(strategy='median')),
770
+ ('scaler', StandardScaler())
 
 
 
 
 
 
 
 
 
 
771
  ])
772
 
773
+ categorical_transformer = Pipeline(steps=[
774
+ ('imputer', SimpleImputer(strategy='most_frequent')),
775
+ ('onehot', OneHotEncoder(handle_unknown='ignore'))
776
+ ])
777
+
778
+ preprocessor = ColumnTransformer(
779
+ transformers=[
780
+ ('num', numeric_transformer, numeric_features),
781
+ ('cat', categorical_transformer, categorical_features)
782
+ ])
783
+
784
+ X_train_processed = preprocessor.fit_transform(X_train)
785
+ X_test_processed = preprocessor.transform(X_test)
786
+
787
+ # Model Training
788
+ if model_name == "Linear Regression":
789
+ model = LinearRegression()
790
+ elif model_name == "Logistic Regression":
791
+ model = LogisticRegression(max_iter=1000)
792
+ elif model_name == "Decision Tree":
793
+ if problem_type == "Regression":
794
+ model = DecisionTreeRegressor()
795
+ else:
796
+ model = DecisionTreeClassifier()
797
+ elif model_name == "Random Forest":
798
+ if problem_type == "Regression":
799
+ model = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth)
800
+ else:
801
+ model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth)
802
+ elif model_name == "Gradient Boosting":
803
+ model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
804
+ elif model_name == "SVM":
805
+ model = SVR() if problem_type == "Regression" else SVC()
806
+
807
+ model.fit(X_train_processed, y_train)
808
+
809
+ # Store model and preprocessor
810
+ st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
811
+ st.session_state.preprocessor = preprocessor
812
+
813
+ # Model Evaluation
814
+ y_pred = model.predict(X_test_processed)
815
+ if problem_type == "Regression":
816
+ mse = mean_squared_error(y_test, y_pred)
817
+ r2 = r2_score(y_test, y_pred)
818
+ st.write(f"Mean Squared Error: {mse:.4f}")
819
+ st.write(f"R-squared: {r2:.4f}")
820
+ else:
821
+ accuracy = accuracy_score(y_test, y_pred)
822
+ st.write(f"Accuracy: {accuracy:.4f}")
 
 
 
823
 
824
+ st.success("Model trained successfully!")
825
+
826
+ except Exception as e:
827
+ st.error(f"An error occurred: {e}")
828
+ else:
829
+ st.write("Please upload and clean data first.")
830
+
831
  elif app_mode == "Predictions":
832
  st.title("🔮 Make Predictions")
833
 
 
855
  prediction = st.session_state.model.predict(input_df)[0]
856
  st.subheader("Prediction Result")
857
  st.write(f"The predicted value is: {prediction}")
858
+
859
+ # Additional Feedback (Example for Classification)
860
+ if isinstance(st.session_state.model.steps[-1][1], LogisticRegression):
861
+ probabilities = st.session_state.model.predict_proba(input_df)[0]
862
+ st.write("Predicted Probabilities:")
863
+ st.write(probabilities)
864
+
865
  except Exception as e:
866
  st.error(f"An error occurred during prediction: {e}")
867
  else:
 
1113
  st.write(f"R-squared: {r2:.4f}")
1114
  else:
1115
  accuracy = accuracy_score(y_test, y_pred)
1116
+ precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
1117
+ recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
1118
+ f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
1119
  st.write(f"Accuracy: {accuracy:.4f}")
1120
  st.write(f"Precision: {precision:.4f}")
1121
  st.write(f"Recall: {recall:.4f}")
1122
  st.write(f"F1 Score: {f1:.4f}")
1123
  st.write("Classification Report:")
1124
+ st.text(classification_report(y_test, y_pred))
1125
 
1126
  st.success("Model trained successfully!")
1127