Spencer525 commited on
Commit
e0263ce
·
verified ·
1 Parent(s): f1e5a35

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -45
app.py CHANGED
@@ -4,13 +4,12 @@ import numpy as np
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
6
  from sklearn.model_selection import train_test_split
7
- from sklearn.preprocessing import StandardScaler
8
  from sklearn.tree import DecisionTreeClassifier
9
  from sklearn.ensemble import RandomForestClassifier
10
  from xgboost import XGBClassifier
11
  from sklearn.inspection import permutation_importance
12
  from sklearn.feature_selection import mutual_info_classif
13
- from sklearn.preprocessing import LabelEncoder
14
  import io
15
  import base64
16
 
@@ -65,39 +64,10 @@ def calculate_feature_importance(X, y):
65
 
66
  return importance_dict
67
 
68
- # Example of usage in the main script
69
- # After uploading the file and selecting the target column, run the analysis
70
- if uploaded_file is not None:
71
- data = pd.read_csv(uploaded_file)
72
- st.write("Data Preview:")
73
- st.write(data.head())
74
-
75
- # Select target variable
76
- target_col = st.selectbox("Select the target variable", data.columns)
77
-
78
- if st.button('Analyze'):
79
- X = data.drop(target_col, axis=1)
80
- y = data[target_col]
81
-
82
- # Ensure that `y` has continuous integer values for classification
83
- st.write("Original Target Values:", y.unique()) # Show original target values for debugging
84
-
85
- # Correlation Matrix
86
- st.subheader('Correlation Matrix')
87
- plot_correlation_matrix(data)
88
-
89
- # Feature Importance
90
- st.subheader('Feature Importance')
91
- importance_dict = calculate_feature_importance(X, y)
92
-
93
- # Create a DataFrame with all feature importances
94
- importance_df = pd.DataFrame(importance_dict, index=X.columns)
95
- st.write(importance_df)
96
-
97
  # Streamlit app
98
  st.title('Heart Disease Feature Analysis')
99
 
100
- # File upload
101
  uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
102
 
103
  if uploaded_file is not None:
@@ -112,16 +82,13 @@ if uploaded_file is not None:
112
  X = data.drop(target_col, axis=1)
113
  y = data[target_col]
114
 
 
 
 
115
  # Correlation Matrix
116
  st.subheader('Correlation Matrix')
117
  plot_correlation_matrix(data)
118
 
119
- # Download correlation matrix as PNG
120
- buf = io.BytesIO()
121
- plt.savefig(buf, format='png')
122
- buf.seek(0)
123
- st.markdown(get_download_link(buf.getvalue(), "correlation_matrix.png", "Download Correlation Matrix as PNG"), unsafe_allow_html=True)
124
-
125
  # Feature Importance
126
  st.subheader('Feature Importance')
127
  importance_dict = calculate_feature_importance(X, y)
@@ -130,12 +97,5 @@ if uploaded_file is not None:
130
  importance_df = pd.DataFrame(importance_dict, index=X.columns)
131
  st.write(importance_df)
132
 
133
- # Download feature importance as XLSX
134
- excel_buffer = io.BytesIO()
135
- with pd.ExcelWriter(excel_buffer, engine='xlsxwriter') as writer:
136
- importance_df.to_excel(writer, sheet_name='Feature Importance')
137
- excel_buffer.seek(0)
138
- st.markdown(get_download_link(excel_buffer.getvalue(), "feature_importance.xlsx", "Download Feature Importance as XLSX"), unsafe_allow_html=True)
139
-
140
  else:
141
  st.write("Please upload a CSV file to begin the analysis.")
 
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
6
  from sklearn.model_selection import train_test_split
7
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
8
  from sklearn.tree import DecisionTreeClassifier
9
  from sklearn.ensemble import RandomForestClassifier
10
  from xgboost import XGBClassifier
11
  from sklearn.inspection import permutation_importance
12
  from sklearn.feature_selection import mutual_info_classif
 
13
  import io
14
  import base64
15
 
 
64
 
65
  return importance_dict
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  # Streamlit app
68
  st.title('Heart Disease Feature Analysis')
69
 
70
+ # File upload (this line defines `uploaded_file`)
71
  uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
72
 
73
  if uploaded_file is not None:
 
82
  X = data.drop(target_col, axis=1)
83
  y = data[target_col]
84
 
85
+ # Ensure that `y` has continuous integer values for classification
86
+ st.write("Original Target Values:", y.unique()) # Show original target values for debugging
87
+
88
  # Correlation Matrix
89
  st.subheader('Correlation Matrix')
90
  plot_correlation_matrix(data)
91
 
 
 
 
 
 
 
92
  # Feature Importance
93
  st.subheader('Feature Importance')
94
  importance_dict = calculate_feature_importance(X, y)
 
97
  importance_df = pd.DataFrame(importance_dict, index=X.columns)
98
  st.write(importance_df)
99
 
 
 
 
 
 
 
 
100
  else:
101
  st.write("Please upload a CSV file to begin the analysis.")