CosmickVisions commited on
Commit
48d1da7
·
verified ·
1 Parent(s): b72ed9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -45
app.py CHANGED
@@ -3,6 +3,15 @@ import pandas as pd
3
  import numpy as np
4
  import plotly.express as px
5
  import plotly.graph_objects as go
 
 
 
 
 
 
 
 
 
6
  import matplotlib.pyplot as plt #For SHAP charts
7
  from scipy.stats import pearsonr, spearmanr
8
  from sklearn.inspection import permutation_importance
@@ -356,20 +365,20 @@ if app_mode == "Data Upload":
356
  # --------------------------
357
  elif app_mode == "Data Cleaning":
358
  st.title("🧹 Smart Data Cleaning")
359
-
360
- if st.session_state.raw_data is None:
 
361
  st.warning("Please upload data first")
362
  st.stop()
363
-
364
- # Use cleaned_data as the base dataframe
365
- df = st.session_state.cleaned_data.copy() # Changed line
366
-
367
- # Initialize session state
368
  if 'data_versions' not in st.session_state:
369
  st.session_state.data_versions = [st.session_state.raw_data.copy()]
370
  if 'cleaned_data' not in st.session_state:
371
  st.session_state.cleaned_data = st.session_state.raw_data.copy()
372
- st.dataframe(st.session_state.cleaned_data.head(), use_container_width=True, height=300)
 
 
373
 
374
  # --------------------------
375
  # Data Health Dashboard
@@ -394,11 +403,12 @@ elif app_mode == "Data Cleaning":
394
  # --------------------------
395
  # Undo Functionality
396
  # --------------------------
397
- if len(st.session_state.data_versions) > 1:
 
398
  if st.button("⏮️ Undo Last Action"):
399
- st.session_state.data_versions.pop() # Remove current version
400
- st.session_state.cleaned_data = st.session_state.data_versions[-1].copy() # Set data
401
- st.success("Last action undone!")
402
 
403
  # --------------------------
404
  # Missing Value Handling
@@ -439,7 +449,8 @@ elif app_mode == "Data Cleaning":
439
  new_df[cols] = new_df[cols].bfill()
440
 
441
  update_cleaned_data(new_df)
442
-
 
443
  except Exception as e:
444
  st.error(f"Error: {str(e)}")
445
  else:
@@ -480,6 +491,7 @@ elif app_mode == "Data Cleaning":
480
  new_df[col_to_convert] = pd.to_datetime(new_df[col_to_convert], format=date_format, errors='coerce')
481
 
482
  update_cleaned_data(new_df)
 
483
  except Exception as e:
484
  st.error(f"Error: {str(e)}")
485
 
@@ -495,6 +507,7 @@ elif app_mode == "Data Cleaning":
495
  new_df = df.copy()
496
  new_df = new_df.drop(columns=columns_to_drop)
497
  update_cleaned_data(new_df)
 
498
 
499
  # --------------------------
500
  # Label Encoding
@@ -511,6 +524,7 @@ elif app_mode == "Data Cleaning":
511
  new_df[col] = le.fit_transform(new_df[col].astype(str))
512
  label_encoders[col] = le
513
  update_cleaned_data(new_df)
 
514
 
515
  # --------------------------
516
  # StandardScaler
@@ -525,6 +539,7 @@ elif app_mode == "Data Cleaning":
525
  scaler = StandardScaler()
526
  new_df[scale_cols] = scaler.fit_transform(new_df[scale_cols])
527
  update_cleaned_data(new_df)
 
528
  except Exception as e:
529
  st.error(f"Error: {str(e)}")
530
 
@@ -558,6 +573,7 @@ elif app_mode == "Data Cleaning":
558
  text_cols = new_df.select_dtypes(include='object').columns
559
  new_df[text_cols] = new_df[text_cols].apply(lambda x: x.str.strip())
560
  update_cleaned_data(new_df)
 
561
 
562
  # --------------------------
563
  # Cleaned Data Preview
@@ -565,11 +581,10 @@ elif app_mode == "Data Cleaning":
565
  if st.session_state.get("cleaned_data") is not None:
566
  enhance_section_title("Cleaned Data Preview", "✨")
567
  with st.expander("✨ Cleaned Data Preview", expanded=True):
568
- st.dataframe(
569
- st.session_state.cleaned_data.head(),
570
- use_container_width=True,
571
- height=400 # <-- Add height parameter here
572
- )
573
 
574
  # --------------------------
575
  # EDA
@@ -577,11 +592,31 @@ elif app_mode == "Data Cleaning":
577
  elif app_mode == "EDA":
578
  st.title("🔍 Interactive Data Explorer")
579
 
580
- if st.session_state.cleaned_data is None:
581
- st.warning("Please clean your data first")
582
- st.stop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
583
 
584
- df = st.session_state.cleaned_data
 
585
 
586
  # --------------------------
587
  # Enhanced Data Overview
@@ -910,24 +945,32 @@ elif app_mode == "EDA":
910
  elif app_mode == "Model Training":
911
  st.title("🤖 Intelligent Model Training")
912
 
913
- if st.session_state.get("cleaned_data") is None:
914
- st.warning("Please clean your data first")
915
- # Show Upload Clean Data button
916
- uploaded_clean_file = st.file_uploader("Upload your cleaned dataset (CSV/XLSX)", type=["csv", "xlsx"])
917
- if uploaded_clean_file:
918
- try:
919
- if uploaded_clean_file.name.endswith('.csv'):
920
- df = pd.read_csv(uploaded_clean_file)
921
- else:
922
- df = pd.read_excel(uploaded_clean_file)
923
- st.session_state.cleaned_data = df
924
- st.success("Cleaned data uploaded successfully!")
925
- except Exception as e:
926
- st.error(f"Error loading file: {str(e)}")
927
- st.stop()
928
-
929
- df = st.session_state.cleaned_data
 
 
 
 
 
930
 
 
 
 
931
  # Model Setup
932
  col1, col2, col3 = st.columns(3)
933
  with col1:
@@ -996,16 +1039,21 @@ elif app_mode == "Model Training":
996
 
997
  use_grid_search = st.checkbox("Use Grid Search for Hyperparameter Tuning")
998
 
 
999
  if st.button("Train Model"):
1000
  if not features:
1001
  st.error("Please select at least one feature.")
1002
  st.stop()
1003
-
 
1004
  # Call the training function
1005
- model, scaler, label_encoder, imputer_numerical, metrics, column_order, importance = train_model(df.copy(), target, features, problem_type, test_size, model_type, model_params, use_grid_search) # Pass a copy to avoid modifying the original
 
 
1006
 
1007
- if model: # Only proceed if training was successful
1008
- st.success("Model trained successfully!")
 
1009
 
1010
  # Display Metrics
1011
  st.subheader("Model Evaluation Metrics")
@@ -1109,10 +1157,22 @@ elif app_mode == "Model Training":
1109
  # Predictions Section (Fixed)
1110
  if app_mode == "Predictions":
1111
  st.title("�� Predictive Analytics - Informed Business Decisions")
 
 
 
 
 
 
 
 
 
 
1112
 
1113
- if st.session_state.get("model") is None:
1114
- st.warning("Please train a model first")
1115
  st.stop()
 
 
1116
 
1117
  model_data = st.session_state.model # Get the entire dictionary
1118
  model = model_data['model'] # Access model
 
3
  import numpy as np
4
  import plotly.express as px
5
  import plotly.graph_objects as go
6
+ from sklearn.impute import SimpleImputer
7
+ from sklearn.model_selection import GridSearchCV
8
+ from sklearn.linear_model import LogisticRegression
9
+ from sklearn.svm import SVC
10
+ from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
11
+ from sklearn.neural_network import MLPRegressor, MLPClassifier
12
+ from sklearn.metrics import confusion_matrix, classification_report, r2_score
13
+ from sklearn.model_selection import cross_val_score
14
+ import scipy.stats as stats
15
  import matplotlib.pyplot as plt #For SHAP charts
16
  from scipy.stats import pearsonr, spearmanr
17
  from sklearn.inspection import permutation_importance
 
365
  # --------------------------
366
  elif app_mode == "Data Cleaning":
367
  st.title("🧹 Smart Data Cleaning")
368
+
369
+ # Check for raw data FIRST
370
+ if 'raw_data' not in st.session_state:
371
  st.warning("Please upload data first")
372
  st.stop()
373
+
374
+ # Initialize data_versions and cleaned_data together
 
 
 
375
  if 'data_versions' not in st.session_state:
376
  st.session_state.data_versions = [st.session_state.raw_data.copy()]
377
  if 'cleaned_data' not in st.session_state:
378
  st.session_state.cleaned_data = st.session_state.raw_data.copy()
379
+
380
+ # Now safely use cleaned_data
381
+ df = st.session_state.cleaned_data.copy()
382
 
383
  # --------------------------
384
  # Data Health Dashboard
 
403
  # --------------------------
404
  # Undo Functionality
405
  # --------------------------
406
+ # In Data Cleaning page's Undo section:
407
+ if 'data_versions' in st.session_state and len(st.session_state.data_versions) > 1:
408
  if st.button("⏮️ Undo Last Action"):
409
+ st.session_state.data_versions.pop()
410
+ st.session_state.cleaned_data = st.session_state.data_versions[-1].copy()
411
+ st.rerun()
412
 
413
  # --------------------------
414
  # Missing Value Handling
 
449
  new_df[cols] = new_df[cols].bfill()
450
 
451
  update_cleaned_data(new_df)
452
+ st.rerun() #Force re-run after apply
453
+
454
  except Exception as e:
455
  st.error(f"Error: {str(e)}")
456
  else:
 
491
  new_df[col_to_convert] = pd.to_datetime(new_df[col_to_convert], format=date_format, errors='coerce')
492
 
493
  update_cleaned_data(new_df)
494
+ st.rerun() #Force re-run after apply
495
  except Exception as e:
496
  st.error(f"Error: {str(e)}")
497
 
 
507
  new_df = df.copy()
508
  new_df = new_df.drop(columns=columns_to_drop)
509
  update_cleaned_data(new_df)
510
+ st.rerun() #Force re-run after apply
511
 
512
  # --------------------------
513
  # Label Encoding
 
524
  new_df[col] = le.fit_transform(new_df[col].astype(str))
525
  label_encoders[col] = le
526
  update_cleaned_data(new_df)
527
+ st.rerun() #Force re-run after apply
528
 
529
  # --------------------------
530
  # StandardScaler
 
539
  scaler = StandardScaler()
540
  new_df[scale_cols] = scaler.fit_transform(new_df[scale_cols])
541
  update_cleaned_data(new_df)
542
+ st.rerun()#Force re-run after apply
543
  except Exception as e:
544
  st.error(f"Error: {str(e)}")
545
 
 
573
  text_cols = new_df.select_dtypes(include='object').columns
574
  new_df[text_cols] = new_df[text_cols].apply(lambda x: x.str.strip())
575
  update_cleaned_data(new_df)
576
+ st.rerun() #Force re-run after apply
577
 
578
  # --------------------------
579
  # Cleaned Data Preview
 
581
  if st.session_state.get("cleaned_data") is not None:
582
  enhance_section_title("Cleaned Data Preview", "✨")
583
  with st.expander("✨ Cleaned Data Preview", expanded=True):
584
+ st.dataframe(st.session_state.cleaned_data.head(), use_container_width=True)
585
+
586
+
587
+
 
588
 
589
  # --------------------------
590
  # EDA
 
592
  elif app_mode == "EDA":
593
  st.title("🔍 Interactive Data Explorer")
594
 
595
+ # Universal check for all dependent pages
596
+ if 'cleaned_data' not in st.session_state:
597
+ st.warning("No cleaned data found! Please either:")
598
+
599
+ col1, col2 = st.columns(2)
600
+ with col1:
601
+ if st.button("↩️ Go to Data Cleaning"):
602
+ st.session_state.app_mode = "Data Cleaning"
603
+ st.experimental_rerun()
604
+
605
+ with col2:
606
+ uploaded_clean = st.file_uploader("📤 Or upload clean data",
607
+ type=["csv", "xlsx"])
608
+ if uploaded_clean:
609
+ try:
610
+ st.session_state.cleaned_data = pd.read_csv(uploaded_clean)
611
+ st.success("Loaded clean data!")
612
+ st.experimental_rerun()
613
+ except Exception as e:
614
+ st.error(f"Invalid file: {str(e)}")
615
+
616
+ st.stop() # Halt execution until resolved
617
 
618
+ # Only reaches here if cleaned_data exists
619
+ df = st.session_state.cleaned_data.copy()
620
 
621
  # --------------------------
622
  # Enhanced Data Overview
 
945
  elif app_mode == "Model Training":
946
  st.title("🤖 Intelligent Model Training")
947
 
948
+ # Universal check for all dependent pages
949
+ if 'cleaned_data' not in st.session_state:
950
+ st.warning("No cleaned data found! Please either:")
951
+
952
+ col1, col2 = st.columns(2)
953
+ with col1:
954
+ if st.button("↩️ Go to Data Cleaning"):
955
+ st.session_state.app_mode = "Data Cleaning"
956
+ st.experimental_rerun()
957
+
958
+ with col2:
959
+ uploaded_clean = st.file_uploader("📤 Or upload clean data",
960
+ type=["csv", "xlsx"])
961
+ if uploaded_clean:
962
+ try:
963
+ st.session_state.cleaned_data = pd.read_csv(uploaded_clean)
964
+ st.success("Loaded clean data!")
965
+ st.experimental_rerun()
966
+ except Exception as e:
967
+ st.error(f"Invalid file: {str(e)}")
968
+
969
+ st.stop() # Halt execution until resolved
970
 
971
+ # Only reaches here if cleaned_data exists
972
+ df = st.session_state.cleaned_data.copy()
973
+
974
  # Model Setup
975
  col1, col2, col3 = st.columns(3)
976
  with col1:
 
1039
 
1040
  use_grid_search = st.checkbox("Use Grid Search for Hyperparameter Tuning")
1041
 
1042
+ # In Model Training section - Fix indentation for training logic
1043
  if st.button("Train Model"):
1044
  if not features:
1045
  st.error("Please select at least one feature.")
1046
  st.stop()
1047
+
1048
+ # INDENT ALL THIS CODE UNDER THE BUTTON CLICK
1049
  # Call the training function
1050
+ model, scaler, label_encoder, imputer_numerical, metrics, column_order, importance, X_train, y_train = train_model(
1051
+ df.copy(), target, features, problem_type, test_size, model_type, model_params, use_grid_search
1052
+ )
1053
 
1054
+ if model: # Only proceed if training was successful
1055
+ st.success("Model trained successfully!")
1056
+ # ... rest of model display code ...
1057
 
1058
  # Display Metrics
1059
  st.subheader("Model Evaluation Metrics")
 
1157
  # Predictions Section (Fixed)
1158
  if app_mode == "Predictions":
1159
  st.title("�� Predictive Analytics - Informed Business Decisions")
1160
+ st.warning("Note: SHAP explanations currently work best with tree-based models like Random Forest")
1161
+
1162
+ # Add model upload section
1163
+ uploaded_model = st.file_uploader("Upload trained model", type="joblib")
1164
+ if uploaded_model:
1165
+ try:
1166
+ st.session_state.model = joblib.load(uploaded_model)
1167
+ st.success("Model loaded successfully!")
1168
+ except:
1169
+ st.error("Invalid model file")
1170
 
1171
+ if 'model' not in st.session_state:
1172
+ st.warning("Please load a trained model first")
1173
  st.stop()
1174
+
1175
+ # Rest of your predictions code...
1176
 
1177
  model_data = st.session_state.model # Get the entire dictionary
1178
  model = model_data['model'] # Access model