CosmickVisions commited on
Commit
c7e14fb
·
verified ·
1 Parent(s): d0457f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -21
app.py CHANGED
@@ -273,6 +273,7 @@ elif app_mode == "Smart Cleaning":
273
  clean_action = st.selectbox("Choose Operation", [
274
  "Handle Missing Values",
275
  "Clean Text",
 
276
  # ... other cleaning operations ...
277
  ])
278
 
@@ -298,7 +299,13 @@ elif app_mode == "Smart Cleaning":
298
  if cleaning_operation == "Remove Special Characters":
299
  chars_to_remove = st.text_input("Characters to Remove", r'[^a-zA-Z0-9\s]')
300
 
 
 
 
301
  with col2:
 
 
 
302
  if st.button("Apply Transformation"):
303
  with st.spinner("Applying changes..."):
304
  current_df = df.copy()
@@ -330,7 +337,7 @@ elif app_mode == "Smart Cleaning":
330
  current_df = current_df.dropna()
331
 
332
  elif clean_action == "Clean Text":
333
- import re #moved here since its only used here to avoid library bloat
334
 
335
  def clean_text(text, operation, chars_to_remove=r'[^a-zA-Z0-9\s]'):
336
  if operation == "Remove Special Characters":
@@ -345,9 +352,15 @@ elif app_mode == "Smart Cleaning":
345
 
346
  current_df[text_column] = current_df[text_column].astype(str).apply(lambda x: clean_text(x, cleaning_operation, chars_to_remove))
347
 
 
 
 
348
  st.session_state.cleaned_data = current_df
349
  st.success("Transformation applied!")
350
-
 
 
 
351
  elif app_mode == "Advanced EDA":
352
  st.title("🔍 Advanced Exploratory Analysis")
353
 
@@ -594,12 +607,15 @@ elif app_mode == "Model Training":
594
 
595
  feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
596
 
597
- if model_name == "Random Forest":
 
 
598
  param_grid = {
599
  'n_estimators': st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest."),
600
  'max_depth': st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree."),
601
  'min_samples_split': st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node"), #New hyperparameter
602
  'min_samples_leaf': st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node"), #New hyperparameter
 
603
  }
604
 
605
  # Train-Test Split
@@ -610,6 +626,12 @@ elif app_mode == "Model Training":
610
  try:
611
  X = df[feature_columns]
612
  y = df[target_column]
 
 
 
 
 
 
613
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
614
 
615
  # Preprocessing Pipeline
@@ -658,16 +680,23 @@ elif app_mode == "Model Training":
658
  elif model_name == "Random Forest":
659
  if problem_type == "Regression":
660
  model = RandomForestRegressor(random_state=42)
661
- grid_search = GridSearchCV(model, param_grid, cv=3, scoring='neg_mean_squared_error') # Example scoring
662
- grid_search.fit(X_train_selected, y_train)
663
- model = grid_search.best_estimator_
664
- st.write("Best Parameters:", grid_search.best_params_)
 
 
 
 
665
  else:
666
  model = RandomForestClassifier(random_state=42)
667
- grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy')
668
- grid_search.fit(X_train_selected, y_train)
669
- model = grid_search.best_estimator_
670
- st.write("Best Parameters:", grid_search.best_params_)
 
 
 
671
 
672
  elif model_name == "Gradient Boosting":
673
  model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
@@ -721,16 +750,17 @@ elif app_mode == "Model Training":
721
  st.error(f"Error loading model: {e}")
722
 
723
  #Model Evaluation Section
724
- y_pred = st.session_state.model.predict(X_test)
725
-
726
- if problem_type == "Regression":
727
- mse = mean_squared_error(y_test, y_pred)
728
- r2 = r2_score(y_test, y_pred)
729
- st.write(f"Mean Squared Error: {mse:.4f}")
730
- st.write(f"R-squared: {r2:.4f}")
731
- else:
732
- accuracy = accuracy_score(y_test, y_pred)
733
- st.write(f"Accuracy: {accuracy:.4f}")
 
734
 
735
  elif app_mode == "Predictions":
736
  st.title("🔮 Make Predictions")
 
273
  clean_action = st.selectbox("Choose Operation", [
274
  "Handle Missing Values",
275
  "Clean Text",
276
+ "Remove Columns", # New option
277
  # ... other cleaning operations ...
278
  ])
279
 
 
299
  if cleaning_operation == "Remove Special Characters":
300
  chars_to_remove = st.text_input("Characters to Remove", r'[^a-zA-Z0-9\s]')
301
 
302
+ elif clean_action == "Remove Columns":
303
+ remove_cols = st.multiselect("Columns to Remove", df.columns) # Multiselect for column removal
304
+
305
  with col2:
306
+ st.subheader("Data Preview") # Added Data Preview Section
307
+ st.dataframe(df.head(10), use_container_width=True) # Display sample data
308
+
309
  if st.button("Apply Transformation"):
310
  with st.spinner("Applying changes..."):
311
  current_df = df.copy()
 
337
  current_df = current_df.dropna()
338
 
339
  elif clean_action == "Clean Text":
340
+ import re # moved here since its only used here to avoid library bloat
341
 
342
  def clean_text(text, operation, chars_to_remove=r'[^a-zA-Z0-9\s]'):
343
  if operation == "Remove Special Characters":
 
352
 
353
  current_df[text_column] = current_df[text_column].astype(str).apply(lambda x: clean_text(x, cleaning_operation, chars_to_remove))
354
 
355
+ elif clean_action == "Remove Columns":
356
+ current_df = current_df.drop(columns=remove_cols) # Drop selected columns
357
+
358
  st.session_state.cleaned_data = current_df
359
  st.success("Transformation applied!")
360
+
361
+ if st.button("Refresh Data Preview"): # Button to refresh data preview
362
+ st.experimental_rerun()
363
+
364
  elif app_mode == "Advanced EDA":
365
  st.title("🔍 Advanced Exploratory Analysis")
366
 
 
607
 
608
  feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
609
 
610
+ if model_name == "Random Forest" and feature_columns: # Check if Random Forest and features are selected
611
+ min_features = 1 # Ensure at least one feature is used
612
+ max_features = len(feature_columns) if len(feature_columns) > 0 else 1 # Use 1 if no features are selected
613
  param_grid = {
614
  'n_estimators': st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest."),
615
  'max_depth': st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree."),
616
  'min_samples_split': st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node"), #New hyperparameter
617
  'min_samples_leaf': st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node"), #New hyperparameter
618
+
619
  }
620
 
621
  # Train-Test Split
 
626
  try:
627
  X = df[feature_columns]
628
  y = df[target_column]
629
+
630
+ # Check if X is empty
631
+ if X.empty:
632
+ st.error("No features were selected. Please select feature columns.")
633
+ st.stop()
634
+
635
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
636
 
637
  # Preprocessing Pipeline
 
680
  elif model_name == "Random Forest":
681
  if problem_type == "Regression":
682
  model = RandomForestRegressor(random_state=42)
683
+ if 'param_grid' in locals():
684
+ grid_search = GridSearchCV(model, param_grid, cv=3, scoring='neg_mean_squared_error') # Example scoring
685
+ grid_search.fit(X_train_selected, y_train)
686
+ model = grid_search.best_estimator_
687
+ st.write("Best Parameters:", grid_search.best_params_)
688
+ else:
689
+ model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined.
690
+
691
  else:
692
  model = RandomForestClassifier(random_state=42)
693
+ if 'param_grid' in locals():
694
+ grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy')
695
+ grid_search.fit(X_train_selected, y_train)
696
+ model = grid_search.best_estimator_
697
+ st.write("Best Parameters:", grid_search.best_params_)
698
+ else:
699
+ model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
700
 
701
  elif model_name == "Gradient Boosting":
702
  model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
 
750
  st.error(f"Error loading model: {e}")
751
 
752
  #Model Evaluation Section
753
+ if 'X_test' in locals() and st.session_state.model is not None:
754
+ y_pred = st.session_state.model.predict(X_test)
755
+
756
+ if problem_type == "Regression":
757
+ mse = mean_squared_error(y_test, y_pred)
758
+ r2 = r2_score(y_test, y_pred)
759
+ st.write(f"Mean Squared Error: {mse:.4f}")
760
+ st.write(f"R-squared: {r2:.4f}")
761
+ else:
762
+ accuracy = accuracy_score(y_test, y_pred)
763
+ st.write(f"Accuracy: {accuracy:.4f}")
764
 
765
  elif app_mode == "Predictions":
766
  st.title("🔮 Make Predictions")