CosmickVisions commited on
Commit
339e41b
·
verified ·
1 Parent(s): 531d1ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -116
app.py CHANGED
@@ -670,13 +670,18 @@ elif app_mode == "Model Training":
670
  # Model Training and Hyperparameter Tuning
671
  if model_name == "Linear Regression":
672
  model = LinearRegression()
 
 
673
  elif model_name == "Logistic Regression":
674
  model = LogisticRegression(max_iter=1000)
 
675
  elif model_name == "Decision Tree":
676
  if problem_type == "Regression":
677
  model = DecisionTreeRegressor()
 
678
  else:
679
  model = DecisionTreeClassifier()
 
680
  elif model_name == "Random Forest":
681
  if problem_type == "Regression":
682
  model = RandomForestRegressor(random_state=42)
@@ -700,70 +705,29 @@ elif app_mode == "Model Training":
700
  elif model_name == "Gradient Boosting":
701
  from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier #moved import here to avoid bloat
702
  model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
 
703
  elif model_name == "SVM":
704
  model = SVR() if problem_type == "Regression" else SVC()
 
705
  elif model_name == "Naive Bayes":
706
  from sklearn.naive_bayes import GaussianNB
707
  model = GaussianNB()
 
708
  elif model_name == "KNN":
709
  from sklearn.neighbors import KNeighborsClassifier
710
  model = KNeighborsClassifier()
 
711
 
712
- # Cost-Sensitive Classification
713
- cost_matrix = None
714
- if problem_type == "Classification":
715
- cost_sensitive = st.checkbox("Enable Cost-Sensitive Classification") #new
716
-
717
- if cost_sensitive:
718
- #Get class labels
719
- classes = np.unique(y_train)
720
-
721
- #Create a matrix, with default cost being 1
722
- cost_matrix = np.ones((len(classes),len(classes)))
723
-
724
- #Cost of correct predictions are 0
725
- np.fill_diagonal(cost_matrix, 0)
726
-
727
- #Allow for individual weight specification
728
- st.write("Define misclassification costs:")
729
-
730
- for i in range(len(classes)):
731
- for j in range(len(classes)):
732
- if i != j:
733
- cost_matrix[i,j] = st.number_input(f"Cost of classifying {classes[i]} as {classes[j]}", value=1.0, min_value=0.0)
734
-
735
-
736
- #Threshold adjustment options, only shows up for log regression
737
-
738
- if model_name == "Logistic Regression" and problem_type == "Classification":
739
- threshold = st.slider("Select Threshold", 0.0, 1.0, 0.5, 0.01, help="Adjust the classification threshold")
740
- model = LogisticRegression(max_iter=1000)
741
-
742
- model.fit(X_train_selected, y_train) #Fit model
743
-
744
- #Adjust predictions according to threshold and make new variables
745
- y_pred_prob = model.predict_proba(X_test_selected)[:,1]
746
- y_pred = (y_pred_prob > threshold).astype(int)
747
-
748
-
749
- else:
750
- # Cross-validation
751
- cv_scores = cross_val_score(model, X_train_selected, y_train, cv=5, scoring='accuracy' if problem_type == "Classification" else 'neg_mean_squared_error') #example, adjust cv
752
- st.write(f"Cross-validation scores: {cv_scores}")
753
- st.write(f"Mean cross-validation score: {cv_scores.mean():.4f}")
754
-
755
- model.fit(X_train_selected, y_train)
756
-
757
- # Store model and preprocessor
758
- st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
759
- st.session_state.preprocessor = preprocessor
760
-
761
- y_pred = model.predict(X_test_selected)
762
-
763
 
 
 
 
764
 
765
  # Model Evaluation
766
-
767
  if problem_type == "Regression":
768
  mse = mean_squared_error(y_test, y_pred)
769
  r2 = r2_score(y_test, y_pred)
@@ -799,10 +763,6 @@ elif app_mode == "Model Training":
799
  ax_conf.set_title('Confusion Matrix')
800
  st.pyplot(fig_conf)
801
 
802
- st.success("Model trained successfully!")
803
-
804
- except Exception as e:
805
- st.error(f"An error occurred: {e}")
806
  else:
807
  st.write("Please upload and clean data first.")
808
 
@@ -833,6 +793,11 @@ elif app_mode == "Model Training":
833
  st.write(f"Mean Squared Error: {mse:.4f}")
834
  st.write(f"R-squared: {r2:.4f}")
835
  else:
 
 
 
 
 
836
  accuracy = accuracy_score(y_test, y_pred)
837
  st.write(f"Accuracy: {accuracy:.4f}")
838
 
@@ -906,67 +871,7 @@ elif app_mode == "Predictions":
906
  else:
907
  st.write("Please train a model first in the 'Model Training' section.")
908
 
909
- elif app_mode == "Predictions":
910
- st.title("🔮 Make Predictions")
911
-
912
- if st.session_state.model is not None and st.session_state.cleaned_data is not None:
913
- df = st.session_state.cleaned_data.copy()
914
-
915
- # Input data for prediction
916
- st.subheader("Enter Data for Prediction")
917
- input_data = {}
918
- model_columns = st.session_state.model.steps[0][1].transformers_[0][2] + st.session_state.model.steps[0][1].transformers_[1][2]
919
- if not set(model_columns).issubset(set(df.drop(columns=[st.session_state.model.steps[-1][0]]).columns)):
920
- st.error("The model was trained on a dataframe that contains different columns than the currently uploaded dataframe. Please upload the correct dataframe.")
921
- st.stop()
922
-
923
- for col in model_columns:
924
- if pd.api.types.is_numeric_dtype(df[col]):
925
- input_data[col] = st.number_input(f"Enter {col}", value=df[col].mean())
926
- else:
927
- input_data[col] = st.selectbox(f"Select {col}", df[col].unique())
928
-
929
- # Prediction Button
930
- if st.button("Make Prediction"):
931
- try:
932
- input_df = pd.DataFrame([input_data])
933
- prediction = st.session_state.model.predict(input_df)[0]
934
- st.subheader("Prediction Result")
935
- st.write(f"The predicted value is: {prediction}")
936
-
937
- # Additional Feedback (Example for Classification)
938
- if isinstance(st.session_state.model.steps[-1][1], LogisticRegression):
939
- probabilities = st.session_state.model.predict_proba(input_df)[0]
940
- st.write("Predicted Probabilities:")
941
- st.write(probabilities)
942
-
943
- except Exception as e:
944
- st.error(f"An error occurred during prediction: {e}")
945
-
946
- #Add batch prediction section in prediction tab
947
- st.subheader("Batch Predictions")
948
- batch_file = st.file_uploader("Upload CSV for Batch Predictions", type=["csv"])
949
- if batch_file is not None:
950
- try:
951
- batch_df = pd.read_csv(batch_file)
952
- # Preprocess the batch data
953
- batch_processed = st.session_state.preprocessor.transform(batch_df)
954
- # Make predictions
955
- batch_predictions = st.session_state.model.predict(batch_processed)
956
- batch_df['Prediction'] = batch_predictions
957
- st.dataframe(batch_df)
958
 
959
- # Download predictions
960
- csv = batch_df.to_csv(index=False)
961
- b64 = base64.b64encode(csv.encode()).decode() # some strings
962
- href = f'<a href="data:file/csv;base64,{b64}" download="predictions.csv">Download Predictions CSV</a>'
963
- st.markdown(href, unsafe_allow_html=True)
964
-
965
- except Exception as e:
966
- st.error(f"Error processing batch file: {e}")
967
-
968
- else:
969
- st.write("Please train a model first in the 'Model Training' section.")
970
 
971
  elif app_mode == "Visualization Lab":
972
  st.title("🔬 Advanced Data Visualization and Clustering Lab")
 
670
  # Model Training and Hyperparameter Tuning
671
  if model_name == "Linear Regression":
672
  model = LinearRegression()
673
+ model.fit(X_train_selected, y_train)
674
+
675
  elif model_name == "Logistic Regression":
676
  model = LogisticRegression(max_iter=1000)
677
+ model.fit(X_train_selected, y_train)
678
  elif model_name == "Decision Tree":
679
  if problem_type == "Regression":
680
  model = DecisionTreeRegressor()
681
+ model.fit(X_train_selected, y_train)
682
  else:
683
  model = DecisionTreeClassifier()
684
+ model.fit(X_train_selected, y_train)
685
  elif model_name == "Random Forest":
686
  if problem_type == "Regression":
687
  model = RandomForestRegressor(random_state=42)
 
705
  elif model_name == "Gradient Boosting":
706
  from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier #moved import here to avoid bloat
707
  model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
708
+ model.fit(X_train_selected, y_train)
709
  elif model_name == "SVM":
710
  model = SVR() if problem_type == "Regression" else SVC()
711
+ model.fit(X_train_selected, y_train)
712
  elif model_name == "Naive Bayes":
713
  from sklearn.naive_bayes import GaussianNB
714
  model = GaussianNB()
715
+ model.fit(X_train_selected, y_train)
716
  elif model_name == "KNN":
717
  from sklearn.neighbors import KNeighborsClassifier
718
  model = KNeighborsClassifier()
719
+ model.fit(X_train_selected, y_train)
720
 
721
+ # Store model and preprocessor
722
+ st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
723
+ st.session_state.preprocessor = preprocessor
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
724
 
725
+ # Store model and preprocessor
726
+ st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
727
+ st.session_state.preprocessor = preprocessor
728
 
729
  # Model Evaluation
730
+ y_pred = model.predict(X_test_selected)
731
  if problem_type == "Regression":
732
  mse = mean_squared_error(y_test, y_pred)
733
  r2 = r2_score(y_test, y_pred)
 
763
  ax_conf.set_title('Confusion Matrix')
764
  st.pyplot(fig_conf)
765
 
 
 
 
 
766
  else:
767
  st.write("Please upload and clean data first.")
768
 
 
793
  st.write(f"Mean Squared Error: {mse:.4f}")
794
  st.write(f"R-squared: {r2:.4f}")
795
  else:
796
+ from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve, classification_report #Import here to avoid library bloat
797
+
798
+ #Weighted averaging for metrics for multiclass
799
+ average_method = "weighted" #changed from None
800
+
801
  accuracy = accuracy_score(y_test, y_pred)
802
  st.write(f"Accuracy: {accuracy:.4f}")
803
 
 
871
  else:
872
  st.write("Please train a model first in the 'Model Training' section.")
873
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
874
 
 
 
 
 
 
 
 
 
 
 
 
875
 
876
  elif app_mode == "Visualization Lab":
877
  st.title("🔬 Advanced Data Visualization and Clustering Lab")