Spaces:

CChircop
/

StreamlitAppAssignment4

Sleeping

App Files Files Community

CChircop commited on Oct 1, 2024

Commit

468c336

verified ·

1 Parent(s): 1ed9f98

Create Streamlit.py

Browse files

Files changed (1) hide show

Streamlit.py +165 -0

Streamlit.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import numpy as np
+import pandas as pd
+import seaborn as sns
+import streamlit as st
+import joblib
+import matplotlib.pyplot as plt
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import confusion_matrix
+from xgboost import XGBClassifier
+# Page configuration
+st.set_page_config(page_title="Bank Account Prediction Dashboard", page_icon="💳")
+st.title('Bank Account Prediction Dashboard')
+# Load model and preprocessing objects
+def load_model_objects():
+    model_xgb = joblib.load('xgb_clf.joblib')
+    scaler = joblib.load('scaler.joblib')
+    encoder_y = joblib.load('encoder.joblib')  # For target variable
+    le_country_economy = joblib.load('country_encoder.joblib')
+    le_regionwb = joblib.load('regionwb_encoder.joblib')
+    return model_xgb, scaler, encoder_y, le_country_economy, le_regionwb
+model_xgb, _scaler, _label_encoder, le_country_economy, le_regionwb = load_model_objects()
+@st.cache_data
+def load_data():
+    # Load the actual data from the CSV file
+    return pd.read_csv(
+        'micro_world_139countries.csv',
+        encoding='ISO-8859-1'
+    )
+@st.cache_data
+def process_data(df, _scaler, _label_encoder, _country_encoder, _regionwb_encoder):
+    # Select relevant columns and sample
+    sample_df = df[['remittances', 'educ', 'age', 'female', 'mobileowner',
+                   'internetaccess', 'pay_utilities', 'receive_transfers',
+                   'receive_pension', 'economy', 'regionwb', 'account']].sample(
+                   n=5000, random_state=42, replace=True)
+    # Drop rows with missing values in specified columns
+    sample_df = sample_df.dropna(subset=['account', 'remittances', 'educ', 'age', 'female',
+                                         'mobileowner', 'internetaccess', 'pay_utilities',
+                                         'receive_transfers', 'receive_pension',
+                                         'economy', 'regionwb'])
+    # Encode 'economy' using the loaded LabelEncoder
+    sample_df['economy'] = _country_encoder.transform(sample_df['economy'])
+    # Encode 'regionwb' using the loaded LabelEncoder
+    sample_df['regionwb'] = _regionwb_encoder.transform(sample_df['regionwb'])
+    # Manual encoding for 'educ'
+    educ_mapping = {'None': 0, 'Primary': 1, 'Secondary': 2, 'Tertiary': 3}
+    sample_df['educ'] = sample_df['educ'].map(educ_mapping).fillna(-1).astype(int)
+    # Manual encoding for 'female'
+    gender_mapping = {'Male': 0, 'Female': 1}
+    sample_df['female'] = sample_df['female'].map(gender_mapping).fillna(-1).astype(int)
+    # Convert boolean columns to integers
+    boolean_columns = ['mobileowner', 'internetaccess', 'pay_utilities',
+                       'receive_transfers', 'receive_pension']
+    for col in boolean_columns:
+        sample_df[col] = sample_df[col].astype(int)
+    # Separate features and target
+    X = sample_df.drop('account', axis=1)
+    y = sample_df['account']
+    # Encode target variable
+    y = _label_encoder.transform(y)
+    # Scale features using the loaded scaler
+    X_scaled = _scaler.transform(X)
+    X_scaled = pd.DataFrame(X_scaled, columns=X.columns)
+    return X_scaled, y
+# Load data
+df = load_data()
+df = df.drop('inc_q', axis=1, errors='ignore')  # Ensure 'inc_q' is dropped if it exists
+# Adding a sidebar for user input
+with st.sidebar:
+    st.title("Input User Data for Prediction")
+    with st.form("user_inputs"):
+        remittances = st.number_input('Remittances', min_value=0, max_value=100000, step=100)
+        educ = st.selectbox('Education Level', options=['None', 'Primary', 'Secondary', 'Tertiary'])
+        age = st.number_input('Age', min_value=18, max_value=100, step=1)
+        female = st.selectbox('Gender', options=['Male', 'Female'])
+        mobileowner = st.radio('Owns a Mobile', options=[True, False])
+        internetaccess = st.radio('Has Internet Access', options=[True, False])
+        pay_utilities = st.radio('Pays Utilities Online', options=[True, False])
+        receive_transfers = st.radio('Receives Transfers', options=[True, False])
+        receive_pension = st.radio('Receives Pension', options=[True, False])
+        economy = st.selectbox('Country', options=list(le_country_economy.classes_))
+        regionwb = st.selectbox('Region', options=list(le_regionwb.classes_))
+        account = 1  # Placeholder or default value
+        submit_button = st.form_submit_button("Predict")
+# Processing user input for prediction
+if submit_button:
+    user_data = pd.DataFrame({
+        'remittances': [remittances],
+        'educ': [educ],
+        'age': [age],
+        'female': [female],
+        'mobileowner': [mobileowner],
+        'internetaccess': [internetaccess],
+        'pay_utilities': [pay_utilities],
+        'receive_transfers': [receive_transfers],
+        'receive_pension': [receive_pension],
+        'economy': [economy],
+        'regionwb': [regionwb],
+        'account': [account]
+    })
+    try:
+        processed_user_data, _ = process_data(
+            user_data, _scaler, _label_encoder, le_country_economy, le_regionwb
+        )
+        prediction = model_xgb.predict(processed_user_data)
+        result = 'Has Bank Account' if prediction[0] == 1 else 'Does Not Have Bank Account'
+        st.sidebar.write(f'Prediction: {result}')
+    except Exception as e:
+        st.sidebar.error(f"Error in processing data: {e}")
+# Process example data
+scaled_data, _ = process_data(df, _scaler, _label_encoder, le_country_economy, le_regionwb)
+# Display the processed data in your Streamlit app
+if scaled_data is not None:
+    st.write("Scaled Data:", scaled_data)
+# Main prediction logic
+# Process the main dataset for predictions
+processed_data, y_main = process_data(df, _scaler, _label_encoder, le_country_economy, le_regionwb)
+if processed_data is not None:
+    # Prepare features for prediction
+    X = processed_data  # 'account' has been dropped in process_data
+    y = y_main
+    # Make predictions
+    predictions = model_xgb.predict(X)
+    # Show predictions
+    st.write("Predictions:")
+    st.write(predictions)
+    # Plotting a confusion matrix
+    st.subheader("Confusion Matrix")
+    cm = confusion_matrix(y, predictions)
+    cm_fig, ax = plt.subplots()
+    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax)
+    plt.ylabel('Actual')
+    plt.xlabel('Predicted')
+    st.pyplot(cm_fig)
+    # Feature importance
+    if st.button('Show Feature Importances'):
+        feat_importances = pd.Series(model_xgb.feature_importances_, index=X.columns)
+        st.bar_chart(feat_importances)