Spaces:

Johan713
/

Advanced-Loan-Predictor

Runtime error

App Files Files Community

Johan713 commited on Aug 23, 2024

Commit

0b7ab28

verified ·

1 Parent(s): c6131e7

Update app.py

Browse files

Files changed (1) hide show

app.py +197 -192

app.py CHANGED Viewed

@@ -1,193 +1,198 @@
-import streamlit as st
-import pandas as pd
-import numpy as np
-import plotly.express as px
-import plotly.graph_objects as go
-from sklearn.model_selection import train_test_split
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.preprocessing import StandardScaler
-@st.cache_data
-def load_and_preprocess_data():
-    data = pd.read_csv('train.csv')
-    data['Gender'].fillna(data['Gender'].mode()[0], inplace=True)
-    data['Married'].fillna(data['Married'].mode()[0], inplace=True)
-    data['Dependents'].fillna(data['Dependents'].mode()[0], inplace=True)
-    data['Self_Employed'].fillna(data['Self_Employed'].mode()[0], inplace=True)
-    data['LoanAmount'].fillna(data['LoanAmount'].median(), inplace=True)
-    data['Loan_Amount_Term'].fillna(data['Loan_Amount_Term'].mode()[0], inplace=True)
-    data['Credit_History'].fillna(data['Credit_History'].mode()[0], inplace=True)
-    data['Dependents'] = data['Dependents'].replace('3+', '3').astype(int)
-    data['LoanAmount'] = np.log1p(data['LoanAmount'])
-    data['ApplicantIncome'] = np.log1p(data['ApplicantIncome'])
-    data['CoapplicantIncome'] = np.log1p(data['CoapplicantIncome'])
-    return data
-@st.cache_resource
-def get_model(data):
-    # Prepare the data
-    X = data.drop(['Loan_ID', 'Loan_Status'], axis=1)
-    y = data['Loan_Status']
-    # Handle categorical variables
-    X = pd.get_dummies(X, drop_first=True)
-    # Store feature names
-    feature_names = X.columns.tolist()
-    # Split the data
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-    # Scale the features
-    scaler = StandardScaler()
-    X_train_scaled = scaler.fit_transform(X_train)
-    X_test_scaled = scaler.transform(X_test)
-    # Train the model
-    model = RandomForestClassifier(n_estimators=100, random_state=42)
-    model.fit(X_train_scaled, y_train)
-    return model, scaler, feature_names
-def predict_loan_approval(model, scaler, feature_names, input_data):
-    input_df = pd.DataFrame([input_data])
-    input_df = pd.get_dummies(input_df, drop_first=True)
-    for col in feature_names:
-        if col not in input_df.columns:
-            input_df[col] = 0
-    input_df = input_df.reindex(columns=feature_names, fill_value=0)
-    input_scaled = scaler.transform(input_df)
-    prediction = model.predict(input_scaled)
-    probability = model.predict_proba(input_scaled)[0][1]
-    return prediction[0], probability
-# Streamlit app
-def main():
-    st.set_page_config(page_title="Loan Approval Predictor", layout="wide")
-    # Sidebar
-    st.sidebar.title("Navigation")
-    page = st.sidebar.radio("Go to", ["Predict", "Explore Data"])
-    # Load data and model
-    data = load_and_preprocess_data()
-    model, scaler, feature_names = get_model(data)
-    if page == "Predict":
-        st.title("Loan Approval Predictor")
-        st.write("Fill in the details below to predict your loan approval chances.")
-        col1, col2, col3 = st.columns(3)
-        with col1:
-            gender = st.selectbox("Gender", ["Male", "Female"])
-            married = st.selectbox("Married", ["Yes", "No"])
-            dependents = st.selectbox("Dependents", ["0", "1", "2", "3+"])
-            education = st.selectbox("Education", ["Graduate", "Not Graduate"])
-        with col2:
-            self_employed = st.selectbox("Self Employed", ["Yes", "No"])
-            applicant_income = st.number_input("Applicant Income", min_value=0)
-            coapplicant_income = st.number_input("Coapplicant Income", min_value=0)
-            loan_amount = st.number_input("Loan Amount", min_value=0)
-        with col3:
-            loan_amount_term = st.number_input("Loan Amount Term (in months)", min_value=0)
-            credit_history = st.selectbox("Credit History", [0, 1])
-            property_area = st.selectbox("Property Area", ["Urban", "Semiurban", "Rural"])
-        if st.button("Predict"):
-            input_data = {
-                'Gender': gender,
-                'Married': married,
-                'Dependents': dependents,
-                'Education': education,
-                'Self_Employed': self_employed,
-                'ApplicantIncome': np.log1p(applicant_income),
-                'CoapplicantIncome': np.log1p(coapplicant_income),
-                'LoanAmount': np.log1p(loan_amount),
-                'Loan_Amount_Term': loan_amount_term,
-                'Credit_History': credit_history,
-                'Property_Area': property_area
-            }
-            prediction, probability = predict_loan_approval(model, scaler, feature_names, input_data)
-            st.subheader("Prediction Result")
-            if prediction == 'Y':
-                st.success(f"Congratulations! Your loan is likely to be approved with a {probability:.2%} chance.")
-            else:
-                st.error(f"Sorry, your loan is likely to be rejected. The approval chance is {probability:.2%}.")
-            # Visualization of prediction probability
-            fig = go.Figure(go.Indicator(
-                mode = "gauge+number",
-                value = probability * 100,
-                domain = {'x': [0, 1], 'y': [0, 1]},
-                title = {'text': "Approval Probability"},
-                gauge = {
-                    'axis': {'range': [0, 100]},
-                    'bar': {'color': "darkblue"},
-                    'steps': [
-                        {'range': [0, 50], 'color': "lightgray"},
-                        {'range': [50, 75], 'color': "gray"},
-                        {'range': [75, 100], 'color': "darkgray"}
-                    ],
-                    'threshold': {
-                        'line': {'color': "red", 'width': 4},
-                        'thickness': 0.75,
-                        'value': 50
-                    }
-                }
-            ))
-            st.plotly_chart(fig)
-    elif page == "Explore Data":
-        st.title("Explore Loan Application Data")
-        # Data overview
-        st.subheader("Data Overview")
-        st.write(data.head())
-        st.write(f"Total number of records: {len(data)}")
-        # Loan Status Distribution
-        st.subheader("Loan Status Distribution")
-        fig = px.pie(data, names='Loan_Status', title='Loan Status Distribution', hole=0.3,
-                     color_discrete_sequence=px.colors.sequential.RdBu)
-        st.plotly_chart(fig)
-        # Correlation Heatmap
-        st.subheader("Correlation Heatmap")
-        numeric_cols = data.select_dtypes(include=[np.number]).columns
-        corr_matrix = data[numeric_cols].corr()
-        fig = px.imshow(corr_matrix, text_auto=True, aspect="auto", color_continuous_scale='RdBu')
-        st.plotly_chart(fig)
-        # Loan Amount Distribution
-        st.subheader("Loan Amount Distribution")
-        fig = px.histogram(data, x="LoanAmount", nbins=50, title="Loan Amount Distribution",
-                           color="Loan_Status", color_discrete_sequence=px.colors.sequential.RdBu)
-        st.plotly_chart(fig)
-        # Applicant Income vs Loan Amount
-        st.subheader("Applicant Income vs Loan Amount")
-        fig = px.scatter(data, x="ApplicantIncome", y="LoanAmount", color="Loan_Status",
-                         title="Applicant Income vs Loan Amount",
-                         color_discrete_sequence=px.colors.sequential.RdBu)
-        st.plotly_chart(fig)
-        # Loan Status by Education and Credit History
-        st.subheader("Loan Status by Education and Credit History")
-        fig = px.sunburst(data, path=['Education', 'Credit_History', 'Loan_Status'],
-                          title="Loan Status by Education and Credit History",
-                          color='Loan_Status', color_discrete_sequence=px.colors.sequential.RdBu)
-        st.plotly_chart(fig)
-if __name__ == "__main__":
     main()

+import streamlit as st
+import pandas as pd
+import numpy as np
+import plotly.express as px
+import plotly.graph_objects as go
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.preprocessing import StandardScaler
+@st.cache_data
+def load_and_preprocess_data():
+    data = pd.read_csv('train.csv')
+    data['Gender'].fillna(data['Gender'].mode()[0], inplace=True)
+    data['Married'].fillna(data['Married'].mode()[0], inplace=True)
+    data['Dependents'].fillna(data['Dependents'].mode()[0], inplace=True)
+    data['Self_Employed'].fillna(data['Self_Employed'].mode()[0], inplace=True)
+    data['LoanAmount'].fillna(data['LoanAmount'].median(), inplace=True)
+    data['Loan_Amount_Term'].fillna(data['Loan_Amount_Term'].mode()[0], inplace=True)
+    data['Credit_History'].fillna(data['Credit_History'].mode()[0], inplace=True)
+    data['Dependents'] = data['Dependents'].replace('3+', '3').astype(int)
+    data['LoanAmount'] = np.log1p(data['LoanAmount'])
+    data['ApplicantIncome'] = np.log1p(data['ApplicantIncome'])
+    data['CoapplicantIncome'] = np.log1p(data['CoapplicantIncome'])
+    return data
+@st.cache_resource
+def get_model(data):
+    # Prepare the data
+    X = data.drop(['Loan_ID', 'Loan_Status'], axis=1)
+    y = data['Loan_Status']
+    # Handle categorical variables
+    X = pd.get_dummies(X, drop_first=True)
+    # Store feature names
+    feature_names = X.columns.tolist()
+    # Split the data
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    # Scale the features
+    scaler = StandardScaler()
+    X_train_scaled = scaler.fit_transform(X_train)
+    X_test_scaled = scaler.transform(X_test)
+    # Train the model
+    model = RandomForestClassifier(n_estimators=100, random_state=42)
+    model.fit(X_train_scaled, y_train)
+    return model, scaler, feature_names
+def predict_loan_approval(model, scaler, feature_names, input_data):
+    input_df = pd.DataFrame([input_data])
+    input_df = pd.get_dummies(input_df, drop_first=True)
+    for col in feature_names:
+        if col not in input_df.columns:
+            input_df[col] = 0
+    input_df = input_df.reindex(columns=feature_names, fill_value=0)
+    input_scaled = scaler.transform(input_df)
+    prediction = model.predict(input_scaled)
+    probability = model.predict_proba(input_scaled)[0][1]
+    adjusted_probability = max(probability, 0.3)
+    adjusted_prediction = 'Y' if adjusted_probability >= 0.5 else 'N'
+    return adjusted_prediction, adjusted_probability
+# Streamlit app
+def main():
+    st.set_page_config(page_title="Loan Approval Predictor", layout="wide")
+    # Sidebar
+    st.sidebar.title("Navigation")
+    page = st.sidebar.radio("Go to", ["Predict", "Explore Data"])
+    # Load data and model
+    data = load_and_preprocess_data()
+    model, scaler, feature_names = get_model(data)
+    if page == "Predict":
+        st.title("Loan Approval Predictor")
+        st.write("Fill in the details below to predict your loan approval chances.")
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            gender = st.selectbox("Gender", ["Male", "Female"])
+            married = st.selectbox("Married", ["Yes", "No"])
+            dependents = st.selectbox("Dependents", ["0", "1", "2", "3+"])
+            education = st.selectbox("Education", ["Graduate", "Not Graduate"])
+        with col2:
+            self_employed = st.selectbox("Self Employed", ["Yes", "No"])
+            applicant_income = st.number_input("Applicant Income", min_value=0)
+            coapplicant_income = st.number_input("Coapplicant Income", min_value=0)
+            loan_amount = st.number_input("Loan Amount", min_value=0)
+        with col3:
+            loan_amount_term = st.number_input("Loan Amount Term (in months)", min_value=0)
+            credit_history = st.selectbox("Credit History", [0, 1])
+            property_area = st.selectbox("Property Area", ["Urban", "Semiurban", "Rural"])
+        if st.button("Predict"):
+            input_data = {
+                'Gender': gender,
+                'Married': married,
+                'Dependents': dependents,
+                'Education': education,
+                'Self_Employed': self_employed,
+                'ApplicantIncome': np.log1p(applicant_income),
+                'CoapplicantIncome': np.log1p(coapplicant_income),
+                'LoanAmount': np.log1p(loan_amount),
+                'Loan_Amount_Term': loan_amount_term,
+                'Credit_History': credit_history,
+                'Property_Area': property_area
+            }
+            prediction, probability = predict_loan_approval(model, scaler, feature_names, input_data)
+            st.subheader("Prediction Result")
+            if prediction == 'Y':
+                st.success(f"Congratulations! Your loan is likely to be approved with a {probability:.2%} chance.")
+            else:
+                st.error(f"Sorry, your loan is likely to be rejected. The approval chance is {probability:.2%}.")
+            # Visualization of prediction probability
+            fig = go.Figure(go.Indicator(
+                mode = "gauge+number",
+                value = probability * 100,
+                domain = {'x': [0, 1], 'y': [0, 1]},
+                title = {'text': "Approval Probability"},
+                gauge = {
+                    'axis': {'range': [0, 100]},
+                    'bar': {'color': "darkblue"},
+                    'steps': [
+                        {'range': [0, 30], 'color': "lightgray"},
+                        {'range': [30, 70], 'color': "gray"},
+                        {'range': [70, 100], 'color': "darkgray"}
+                    ],
+                    'threshold': {
+                        'line': {'color': "red", 'width': 4},
+                        'thickness': 0.75,
+                        'value': 30
+                    }
+                }
+            ))
+            st.plotly_chart(fig)
+    elif page == "Explore Data":
+        st.title("Explore Loan Application Data")
+        # Data overview
+        st.subheader("Data Overview")
+        st.write(data.head())
+        st.write(f"Total number of records: {len(data)}")
+        # Loan Status Distribution
+        st.subheader("Loan Status Distribution")
+        fig = px.pie(data, names='Loan_Status', title='Loan Status Distribution', hole=0.3,
+                     color_discrete_sequence=px.colors.sequential.RdBu)
+        st.plotly_chart(fig)
+        # Correlation Heatmap
+        st.subheader("Correlation Heatmap")
+        numeric_cols = data.select_dtypes(include=[np.number]).columns
+        corr_matrix = data[numeric_cols].corr()
+        fig = px.imshow(corr_matrix, text_auto=True, aspect="auto", color_continuous_scale='RdBu')
+        st.plotly_chart(fig)
+        # Loan Amount Distribution
+        st.subheader("Loan Amount Distribution")
+        fig = px.histogram(data, x="LoanAmount", nbins=50, title="Loan Amount Distribution",
+                           color="Loan_Status", color_discrete_sequence=px.colors.sequential.RdBu)
+        st.plotly_chart(fig)
+        # Applicant Income vs Loan Amount
+        st.subheader("Applicant Income vs Loan Amount")
+        fig = px.scatter(data, x="ApplicantIncome", y="LoanAmount", color="Loan_Status",
+                         title="Applicant Income vs Loan Amount",
+                         color_discrete_sequence=px.colors.sequential.RdBu)
+        st.plotly_chart(fig)
+        # Loan Status by Education and Credit History
+        st.subheader("Loan Status by Education and Credit History")
+        fig = px.sunburst(data, path=['Education', 'Credit_History', 'Loan_Status'],
+                          title="Loan Status by Education and Credit History",
+                          color='Loan_Status', color_discrete_sequence=px.colors.sequential.RdBu)
+        st.plotly_chart(fig)
+if __name__ == "__main__":
     main()