CChircop commited on
Commit
468c336
·
verified ·
1 Parent(s): 1ed9f98

Create Streamlit.py

Browse files
Files changed (1) hide show
  1. Streamlit.py +165 -0
Streamlit.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import streamlit as st
5
+ import joblib
6
+ import matplotlib.pyplot as plt
7
+ from sklearn.preprocessing import StandardScaler
8
+ from sklearn.metrics import confusion_matrix
9
+ from xgboost import XGBClassifier
10
+
11
+ # Page configuration
12
+ st.set_page_config(page_title="Bank Account Prediction Dashboard", page_icon="💳")
13
+ st.title('Bank Account Prediction Dashboard')
14
+
15
+ # Load model and preprocessing objects
16
+ def load_model_objects():
17
+ model_xgb = joblib.load('xgb_clf.joblib')
18
+ scaler = joblib.load('scaler.joblib')
19
+ encoder_y = joblib.load('encoder.joblib') # For target variable
20
+ le_country_economy = joblib.load('country_encoder.joblib')
21
+ le_regionwb = joblib.load('regionwb_encoder.joblib')
22
+ return model_xgb, scaler, encoder_y, le_country_economy, le_regionwb
23
+
24
+ model_xgb, _scaler, _label_encoder, le_country_economy, le_regionwb = load_model_objects()
25
+
26
+ @st.cache_data
27
+ def load_data():
28
+ # Load the actual data from the CSV file
29
+ return pd.read_csv(
30
+ 'micro_world_139countries.csv',
31
+ encoding='ISO-8859-1'
32
+ )
33
+
34
+ @st.cache_data
35
+ def process_data(df, _scaler, _label_encoder, _country_encoder, _regionwb_encoder):
36
+ # Select relevant columns and sample
37
+ sample_df = df[['remittances', 'educ', 'age', 'female', 'mobileowner',
38
+ 'internetaccess', 'pay_utilities', 'receive_transfers',
39
+ 'receive_pension', 'economy', 'regionwb', 'account']].sample(
40
+ n=5000, random_state=42, replace=True)
41
+
42
+ # Drop rows with missing values in specified columns
43
+ sample_df = sample_df.dropna(subset=['account', 'remittances', 'educ', 'age', 'female',
44
+ 'mobileowner', 'internetaccess', 'pay_utilities',
45
+ 'receive_transfers', 'receive_pension',
46
+ 'economy', 'regionwb'])
47
+
48
+ # Encode 'economy' using the loaded LabelEncoder
49
+ sample_df['economy'] = _country_encoder.transform(sample_df['economy'])
50
+
51
+ # Encode 'regionwb' using the loaded LabelEncoder
52
+ sample_df['regionwb'] = _regionwb_encoder.transform(sample_df['regionwb'])
53
+
54
+ # Manual encoding for 'educ'
55
+ educ_mapping = {'None': 0, 'Primary': 1, 'Secondary': 2, 'Tertiary': 3}
56
+ sample_df['educ'] = sample_df['educ'].map(educ_mapping).fillna(-1).astype(int)
57
+
58
+ # Manual encoding for 'female'
59
+ gender_mapping = {'Male': 0, 'Female': 1}
60
+ sample_df['female'] = sample_df['female'].map(gender_mapping).fillna(-1).astype(int)
61
+
62
+ # Convert boolean columns to integers
63
+ boolean_columns = ['mobileowner', 'internetaccess', 'pay_utilities',
64
+ 'receive_transfers', 'receive_pension']
65
+ for col in boolean_columns:
66
+ sample_df[col] = sample_df[col].astype(int)
67
+
68
+ # Separate features and target
69
+ X = sample_df.drop('account', axis=1)
70
+ y = sample_df['account']
71
+
72
+ # Encode target variable
73
+ y = _label_encoder.transform(y)
74
+
75
+ # Scale features using the loaded scaler
76
+ X_scaled = _scaler.transform(X)
77
+ X_scaled = pd.DataFrame(X_scaled, columns=X.columns)
78
+
79
+ return X_scaled, y
80
+
81
+ # Load data
82
+ df = load_data()
83
+ df = df.drop('inc_q', axis=1, errors='ignore') # Ensure 'inc_q' is dropped if it exists
84
+
85
+ # Adding a sidebar for user input
86
+ with st.sidebar:
87
+ st.title("Input User Data for Prediction")
88
+ with st.form("user_inputs"):
89
+ remittances = st.number_input('Remittances', min_value=0, max_value=100000, step=100)
90
+ educ = st.selectbox('Education Level', options=['None', 'Primary', 'Secondary', 'Tertiary'])
91
+ age = st.number_input('Age', min_value=18, max_value=100, step=1)
92
+ female = st.selectbox('Gender', options=['Male', 'Female'])
93
+ mobileowner = st.radio('Owns a Mobile', options=[True, False])
94
+ internetaccess = st.radio('Has Internet Access', options=[True, False])
95
+ pay_utilities = st.radio('Pays Utilities Online', options=[True, False])
96
+ receive_transfers = st.radio('Receives Transfers', options=[True, False])
97
+ receive_pension = st.radio('Receives Pension', options=[True, False])
98
+ economy = st.selectbox('Country', options=list(le_country_economy.classes_))
99
+ regionwb = st.selectbox('Region', options=list(le_regionwb.classes_))
100
+ account = 1 # Placeholder or default value
101
+ submit_button = st.form_submit_button("Predict")
102
+
103
+ # Processing user input for prediction
104
+ if submit_button:
105
+ user_data = pd.DataFrame({
106
+ 'remittances': [remittances],
107
+ 'educ': [educ],
108
+ 'age': [age],
109
+ 'female': [female],
110
+ 'mobileowner': [mobileowner],
111
+ 'internetaccess': [internetaccess],
112
+ 'pay_utilities': [pay_utilities],
113
+ 'receive_transfers': [receive_transfers],
114
+ 'receive_pension': [receive_pension],
115
+ 'economy': [economy],
116
+ 'regionwb': [regionwb],
117
+ 'account': [account]
118
+ })
119
+
120
+ try:
121
+ processed_user_data, _ = process_data(
122
+ user_data, _scaler, _label_encoder, le_country_economy, le_regionwb
123
+ )
124
+
125
+ prediction = model_xgb.predict(processed_user_data)
126
+ result = 'Has Bank Account' if prediction[0] == 1 else 'Does Not Have Bank Account'
127
+ st.sidebar.write(f'Prediction: {result}')
128
+ except Exception as e:
129
+ st.sidebar.error(f"Error in processing data: {e}")
130
+
131
+ # Process example data
132
+ scaled_data, _ = process_data(df, _scaler, _label_encoder, le_country_economy, le_regionwb)
133
+
134
+ # Display the processed data in your Streamlit app
135
+ if scaled_data is not None:
136
+ st.write("Scaled Data:", scaled_data)
137
+
138
+ # Main prediction logic
139
+ # Process the main dataset for predictions
140
+ processed_data, y_main = process_data(df, _scaler, _label_encoder, le_country_economy, le_regionwb)
141
+ if processed_data is not None:
142
+ # Prepare features for prediction
143
+ X = processed_data # 'account' has been dropped in process_data
144
+ y = y_main
145
+
146
+ # Make predictions
147
+ predictions = model_xgb.predict(X)
148
+
149
+ # Show predictions
150
+ st.write("Predictions:")
151
+ st.write(predictions)
152
+
153
+ # Plotting a confusion matrix
154
+ st.subheader("Confusion Matrix")
155
+ cm = confusion_matrix(y, predictions)
156
+ cm_fig, ax = plt.subplots()
157
+ sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax)
158
+ plt.ylabel('Actual')
159
+ plt.xlabel('Predicted')
160
+ st.pyplot(cm_fig)
161
+
162
+ # Feature importance
163
+ if st.button('Show Feature Importances'):
164
+ feat_importances = pd.Series(model_xgb.feature_importances_, index=X.columns)
165
+ st.bar_chart(feat_importances)