CosmickVisions commited on
Commit
9e30598
·
verified ·
1 Parent(s): c7e14fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -7
app.py CHANGED
@@ -353,13 +353,14 @@ elif app_mode == "Smart Cleaning":
353
  current_df[text_column] = current_df[text_column].astype(str).apply(lambda x: clean_text(x, cleaning_operation, chars_to_remove))
354
 
355
  elif clean_action == "Remove Columns":
356
- current_df = current_df.drop(columns=remove_cols) # Drop selected columns
 
357
 
358
  st.session_state.cleaned_data = current_df
359
  st.success("Transformation applied!")
360
 
361
  if st.button("Refresh Data Preview"): # Button to refresh data preview
362
- st.experimental_rerun()
363
 
364
  elif app_mode == "Advanced EDA":
365
  st.title("🔍 Advanced Exploratory Analysis")
@@ -611,11 +612,10 @@ elif app_mode == "Model Training":
611
  min_features = 1 # Ensure at least one feature is used
612
  max_features = len(feature_columns) if len(feature_columns) > 0 else 1 # Use 1 if no features are selected
613
  param_grid = {
614
- 'n_estimators': st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest."),
615
- 'max_depth': st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree."),
616
- 'min_samples_split': st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node"), #New hyperparameter
617
- 'min_samples_leaf': st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node"), #New hyperparameter
618
-
619
  }
620
 
621
  # Train-Test Split
@@ -699,6 +699,7 @@ elif app_mode == "Model Training":
699
  model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
700
 
701
  elif model_name == "Gradient Boosting":
 
702
  model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
703
  elif model_name == "SVM":
704
  model = SVR() if problem_type == "Regression" else SVC()
@@ -761,6 +762,76 @@ elif app_mode == "Model Training":
761
  else:
762
  accuracy = accuracy_score(y_test, y_pred)
763
  st.write(f"Accuracy: {accuracy:.4f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
764
 
765
  elif app_mode == "Predictions":
766
  st.title("🔮 Make Predictions")
 
353
  current_df[text_column] = current_df[text_column].astype(str).apply(lambda x: clean_text(x, cleaning_operation, chars_to_remove))
354
 
355
  elif clean_action == "Remove Columns":
356
+ if remove_cols: #Check that it is not empty
357
+ current_df = current_df.drop(columns=remove_cols) # Drop selected columns
358
 
359
  st.session_state.cleaned_data = current_df
360
  st.success("Transformation applied!")
361
 
362
  if st.button("Refresh Data Preview"): # Button to refresh data preview
363
+ st.rerun()
364
 
365
  elif app_mode == "Advanced EDA":
366
  st.title("🔍 Advanced Exploratory Analysis")
 
612
  min_features = 1 # Ensure at least one feature is used
613
  max_features = len(feature_columns) if len(feature_columns) > 0 else 1 # Use 1 if no features are selected
614
  param_grid = {
615
+ 'n_estimators': list(range(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest."),(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.")+1))),
616
+ 'max_depth': list(range(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree."),(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.")+1))),
617
+ 'min_samples_split': list(range(st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node"),(st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node")+1))), #New hyperparameter
618
+ 'min_samples_leaf': list(range(st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node"),(st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node")+1))), #New hyperparameter
 
619
  }
620
 
621
  # Train-Test Split
 
699
  model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
700
 
701
  elif model_name == "Gradient Boosting":
702
+ from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier #moved import here to avoid bloat
703
  model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
704
  elif model_name == "SVM":
705
  model = SVR() if problem_type == "Regression" else SVC()
 
762
  else:
763
  accuracy = accuracy_score(y_test, y_pred)
764
  st.write(f"Accuracy: {accuracy:.4f}")
765
+
766
+ elif app_mode == "Predictions":
767
+ st.title("🔮 Make Predictions")
768
+
769
+ if st.session_state.model is not None and st.session_state.cleaned_data is not None:
770
+ df = st.session_state.cleaned_data.copy()
771
+
772
+ # Input data for prediction
773
+ st.subheader("Enter Data for Prediction")
774
+ input_data = {}
775
+
776
+ try:
777
+ numeric_transformer_columns = st.session_state.model.steps[0][1].transformers_[0][2] if hasattr(st.session_state.model.steps[0][1].transformers_[0][2], '__len__') else []
778
+ categorical_transformer_columns = st.session_state.model.steps[0][1].transformers_[1][2] if hasattr(st.session_state.model.steps[0][1].transformers_[1][2], '__len__') else []
779
+ model_columns = numeric_transformer_columns + categorical_transformer_columns
780
+ except AttributeError as e:
781
+ st.error(f"Error accessing model transformers: {e}. Please ensure a valid model is trained and loaded.")
782
+ st.stop()
783
+
784
+ if not set(model_columns).issubset(set(df.columns)): #Fixed comparison
785
+ st.error("The model was trained on a dataframe that contains different columns than the currently uploaded dataframe. Please upload the correct dataframe.")
786
+ st.stop()
787
+
788
+ for col in model_columns:
789
+ if pd.api.types.is_numeric_dtype(df[col]):
790
+ input_data[col] = st.number_input(f"Enter {col}", value=df[col].mean())
791
+ else:
792
+ input_data[col] = st.selectbox(f"Select {col}", df[col].unique())
793
+
794
+ # Prediction Button
795
+ if st.button("Make Prediction"):
796
+ try:
797
+ input_df = pd.DataFrame([input_data])
798
+ prediction = st.session_state.model.predict(input_df)[0]
799
+ st.subheader("Prediction Result")
800
+ st.write(f"The predicted value is: {prediction}")
801
+
802
+ # Additional Feedback (Example for Classification)
803
+ if isinstance(st.session_state.model.steps[-1][1], LogisticRegression):
804
+ probabilities = st.session_state.model.predict_proba(input_df)[0]
805
+ st.write("Predicted Probabilities:")
806
+ st.write(probabilities)
807
+
808
+ except Exception as e:
809
+ st.error(f"An error occurred during prediction: {e}")
810
+
811
+ #Add batch prediction section in prediction tab
812
+ st.subheader("Batch Predictions")
813
+ batch_file = st.file_uploader("Upload CSV for Batch Predictions", type=["csv"])
814
+ if batch_file is not None:
815
+ try:
816
+ batch_df = pd.read_csv(batch_file)
817
+ # Preprocess the batch data
818
+ batch_processed = st.session_state.preprocessor.transform(batch_df)
819
+ # Make predictions
820
+ batch_predictions = st.session_state.model.predict(batch_processed)
821
+ batch_df['Prediction'] = batch_predictions
822
+ st.dataframe(batch_df)
823
+
824
+ # Download predictions
825
+ csv = batch_df.to_csv(index=False)
826
+ b64 = base64.b64encode(csv.encode()).decode() # some strings
827
+ href = f'<a href="data:file/csv;base64,{b64}" download="predictions.csv">Download Predictions CSV</a>'
828
+ st.markdown(href, unsafe_allow_html=True)
829
+
830
+ except Exception as e:
831
+ st.error(f"Error processing batch file: {e}")
832
+
833
+ else:
834
+ st.write("Please train a model first in the 'Model Training' section.")
835
 
836
  elif app_mode == "Predictions":
837
  st.title("🔮 Make Predictions")