Johan713 commited on
Commit
0b7ab28
·
verified ·
1 Parent(s): c6131e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +197 -192
app.py CHANGED
@@ -1,193 +1,198 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import numpy as np
4
- import plotly.express as px
5
- import plotly.graph_objects as go
6
- from sklearn.model_selection import train_test_split
7
- from sklearn.ensemble import RandomForestClassifier
8
- from sklearn.preprocessing import StandardScaler
9
-
10
- @st.cache_data
11
- def load_and_preprocess_data():
12
- data = pd.read_csv('train.csv')
13
-
14
- data['Gender'].fillna(data['Gender'].mode()[0], inplace=True)
15
- data['Married'].fillna(data['Married'].mode()[0], inplace=True)
16
- data['Dependents'].fillna(data['Dependents'].mode()[0], inplace=True)
17
- data['Self_Employed'].fillna(data['Self_Employed'].mode()[0], inplace=True)
18
- data['LoanAmount'].fillna(data['LoanAmount'].median(), inplace=True)
19
- data['Loan_Amount_Term'].fillna(data['Loan_Amount_Term'].mode()[0], inplace=True)
20
- data['Credit_History'].fillna(data['Credit_History'].mode()[0], inplace=True)
21
-
22
- data['Dependents'] = data['Dependents'].replace('3+', '3').astype(int)
23
-
24
- data['LoanAmount'] = np.log1p(data['LoanAmount'])
25
- data['ApplicantIncome'] = np.log1p(data['ApplicantIncome'])
26
- data['CoapplicantIncome'] = np.log1p(data['CoapplicantIncome'])
27
-
28
- return data
29
-
30
- @st.cache_resource
31
- def get_model(data):
32
- # Prepare the data
33
- X = data.drop(['Loan_ID', 'Loan_Status'], axis=1)
34
- y = data['Loan_Status']
35
-
36
- # Handle categorical variables
37
- X = pd.get_dummies(X, drop_first=True)
38
-
39
- # Store feature names
40
- feature_names = X.columns.tolist()
41
-
42
- # Split the data
43
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
44
-
45
- # Scale the features
46
- scaler = StandardScaler()
47
- X_train_scaled = scaler.fit_transform(X_train)
48
- X_test_scaled = scaler.transform(X_test)
49
-
50
- # Train the model
51
- model = RandomForestClassifier(n_estimators=100, random_state=42)
52
- model.fit(X_train_scaled, y_train)
53
-
54
- return model, scaler, feature_names
55
- def predict_loan_approval(model, scaler, feature_names, input_data):
56
- input_df = pd.DataFrame([input_data])
57
- input_df = pd.get_dummies(input_df, drop_first=True)
58
-
59
- for col in feature_names:
60
- if col not in input_df.columns:
61
- input_df[col] = 0
62
-
63
- input_df = input_df.reindex(columns=feature_names, fill_value=0)
64
-
65
- input_scaled = scaler.transform(input_df)
66
- prediction = model.predict(input_scaled)
67
- probability = model.predict_proba(input_scaled)[0][1]
68
- return prediction[0], probability
69
-
70
- # Streamlit app
71
- def main():
72
- st.set_page_config(page_title="Loan Approval Predictor", layout="wide")
73
-
74
- # Sidebar
75
- st.sidebar.title("Navigation")
76
- page = st.sidebar.radio("Go to", ["Predict", "Explore Data"])
77
-
78
- # Load data and model
79
- data = load_and_preprocess_data()
80
- model, scaler, feature_names = get_model(data)
81
-
82
- if page == "Predict":
83
- st.title("Loan Approval Predictor")
84
- st.write("Fill in the details below to predict your loan approval chances.")
85
-
86
- col1, col2, col3 = st.columns(3)
87
-
88
- with col1:
89
- gender = st.selectbox("Gender", ["Male", "Female"])
90
- married = st.selectbox("Married", ["Yes", "No"])
91
- dependents = st.selectbox("Dependents", ["0", "1", "2", "3+"])
92
- education = st.selectbox("Education", ["Graduate", "Not Graduate"])
93
-
94
- with col2:
95
- self_employed = st.selectbox("Self Employed", ["Yes", "No"])
96
- applicant_income = st.number_input("Applicant Income", min_value=0)
97
- coapplicant_income = st.number_input("Coapplicant Income", min_value=0)
98
- loan_amount = st.number_input("Loan Amount", min_value=0)
99
-
100
- with col3:
101
- loan_amount_term = st.number_input("Loan Amount Term (in months)", min_value=0)
102
- credit_history = st.selectbox("Credit History", [0, 1])
103
- property_area = st.selectbox("Property Area", ["Urban", "Semiurban", "Rural"])
104
-
105
- if st.button("Predict"):
106
- input_data = {
107
- 'Gender': gender,
108
- 'Married': married,
109
- 'Dependents': dependents,
110
- 'Education': education,
111
- 'Self_Employed': self_employed,
112
- 'ApplicantIncome': np.log1p(applicant_income),
113
- 'CoapplicantIncome': np.log1p(coapplicant_income),
114
- 'LoanAmount': np.log1p(loan_amount),
115
- 'Loan_Amount_Term': loan_amount_term,
116
- 'Credit_History': credit_history,
117
- 'Property_Area': property_area
118
- }
119
-
120
- prediction, probability = predict_loan_approval(model, scaler, feature_names, input_data)
121
-
122
- st.subheader("Prediction Result")
123
- if prediction == 'Y':
124
- st.success(f"Congratulations! Your loan is likely to be approved with a {probability:.2%} chance.")
125
- else:
126
- st.error(f"Sorry, your loan is likely to be rejected. The approval chance is {probability:.2%}.")
127
-
128
- # Visualization of prediction probability
129
- fig = go.Figure(go.Indicator(
130
- mode = "gauge+number",
131
- value = probability * 100,
132
- domain = {'x': [0, 1], 'y': [0, 1]},
133
- title = {'text': "Approval Probability"},
134
- gauge = {
135
- 'axis': {'range': [0, 100]},
136
- 'bar': {'color': "darkblue"},
137
- 'steps': [
138
- {'range': [0, 50], 'color': "lightgray"},
139
- {'range': [50, 75], 'color': "gray"},
140
- {'range': [75, 100], 'color': "darkgray"}
141
- ],
142
- 'threshold': {
143
- 'line': {'color': "red", 'width': 4},
144
- 'thickness': 0.75,
145
- 'value': 50
146
- }
147
- }
148
- ))
149
- st.plotly_chart(fig)
150
-
151
- elif page == "Explore Data":
152
- st.title("Explore Loan Application Data")
153
-
154
- # Data overview
155
- st.subheader("Data Overview")
156
- st.write(data.head())
157
- st.write(f"Total number of records: {len(data)}")
158
-
159
- # Loan Status Distribution
160
- st.subheader("Loan Status Distribution")
161
- fig = px.pie(data, names='Loan_Status', title='Loan Status Distribution', hole=0.3,
162
- color_discrete_sequence=px.colors.sequential.RdBu)
163
- st.plotly_chart(fig)
164
-
165
- # Correlation Heatmap
166
- st.subheader("Correlation Heatmap")
167
- numeric_cols = data.select_dtypes(include=[np.number]).columns
168
- corr_matrix = data[numeric_cols].corr()
169
- fig = px.imshow(corr_matrix, text_auto=True, aspect="auto", color_continuous_scale='RdBu')
170
- st.plotly_chart(fig)
171
-
172
- # Loan Amount Distribution
173
- st.subheader("Loan Amount Distribution")
174
- fig = px.histogram(data, x="LoanAmount", nbins=50, title="Loan Amount Distribution",
175
- color="Loan_Status", color_discrete_sequence=px.colors.sequential.RdBu)
176
- st.plotly_chart(fig)
177
-
178
- # Applicant Income vs Loan Amount
179
- st.subheader("Applicant Income vs Loan Amount")
180
- fig = px.scatter(data, x="ApplicantIncome", y="LoanAmount", color="Loan_Status",
181
- title="Applicant Income vs Loan Amount",
182
- color_discrete_sequence=px.colors.sequential.RdBu)
183
- st.plotly_chart(fig)
184
-
185
- # Loan Status by Education and Credit History
186
- st.subheader("Loan Status by Education and Credit History")
187
- fig = px.sunburst(data, path=['Education', 'Credit_History', 'Loan_Status'],
188
- title="Loan Status by Education and Credit History",
189
- color='Loan_Status', color_discrete_sequence=px.colors.sequential.RdBu)
190
- st.plotly_chart(fig)
191
-
192
- if __name__ == "__main__":
 
 
 
 
 
193
  main()
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import plotly.express as px
5
+ import plotly.graph_objects as go
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.ensemble import RandomForestClassifier
8
+ from sklearn.preprocessing import StandardScaler
9
+
10
+ @st.cache_data
11
+ def load_and_preprocess_data():
12
+ data = pd.read_csv('train.csv')
13
+
14
+ data['Gender'].fillna(data['Gender'].mode()[0], inplace=True)
15
+ data['Married'].fillna(data['Married'].mode()[0], inplace=True)
16
+ data['Dependents'].fillna(data['Dependents'].mode()[0], inplace=True)
17
+ data['Self_Employed'].fillna(data['Self_Employed'].mode()[0], inplace=True)
18
+ data['LoanAmount'].fillna(data['LoanAmount'].median(), inplace=True)
19
+ data['Loan_Amount_Term'].fillna(data['Loan_Amount_Term'].mode()[0], inplace=True)
20
+ data['Credit_History'].fillna(data['Credit_History'].mode()[0], inplace=True)
21
+
22
+ data['Dependents'] = data['Dependents'].replace('3+', '3').astype(int)
23
+
24
+ data['LoanAmount'] = np.log1p(data['LoanAmount'])
25
+ data['ApplicantIncome'] = np.log1p(data['ApplicantIncome'])
26
+ data['CoapplicantIncome'] = np.log1p(data['CoapplicantIncome'])
27
+
28
+ return data
29
+
30
+ @st.cache_resource
31
+ def get_model(data):
32
+ # Prepare the data
33
+ X = data.drop(['Loan_ID', 'Loan_Status'], axis=1)
34
+ y = data['Loan_Status']
35
+
36
+ # Handle categorical variables
37
+ X = pd.get_dummies(X, drop_first=True)
38
+
39
+ # Store feature names
40
+ feature_names = X.columns.tolist()
41
+
42
+ # Split the data
43
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
44
+
45
+ # Scale the features
46
+ scaler = StandardScaler()
47
+ X_train_scaled = scaler.fit_transform(X_train)
48
+ X_test_scaled = scaler.transform(X_test)
49
+
50
+ # Train the model
51
+ model = RandomForestClassifier(n_estimators=100, random_state=42)
52
+ model.fit(X_train_scaled, y_train)
53
+
54
+ return model, scaler, feature_names
55
+ def predict_loan_approval(model, scaler, feature_names, input_data):
56
+ input_df = pd.DataFrame([input_data])
57
+ input_df = pd.get_dummies(input_df, drop_first=True)
58
+
59
+ for col in feature_names:
60
+ if col not in input_df.columns:
61
+ input_df[col] = 0
62
+
63
+ input_df = input_df.reindex(columns=feature_names, fill_value=0)
64
+
65
+ input_scaled = scaler.transform(input_df)
66
+ prediction = model.predict(input_scaled)
67
+ probability = model.predict_proba(input_scaled)[0][1]
68
+
69
+ adjusted_probability = max(probability, 0.3)
70
+
71
+ adjusted_prediction = 'Y' if adjusted_probability >= 0.5 else 'N'
72
+
73
+ return adjusted_prediction, adjusted_probability
74
+
75
+ # Streamlit app
76
+ def main():
77
+ st.set_page_config(page_title="Loan Approval Predictor", layout="wide")
78
+
79
+ # Sidebar
80
+ st.sidebar.title("Navigation")
81
+ page = st.sidebar.radio("Go to", ["Predict", "Explore Data"])
82
+
83
+ # Load data and model
84
+ data = load_and_preprocess_data()
85
+ model, scaler, feature_names = get_model(data)
86
+
87
+ if page == "Predict":
88
+ st.title("Loan Approval Predictor")
89
+ st.write("Fill in the details below to predict your loan approval chances.")
90
+
91
+ col1, col2, col3 = st.columns(3)
92
+
93
+ with col1:
94
+ gender = st.selectbox("Gender", ["Male", "Female"])
95
+ married = st.selectbox("Married", ["Yes", "No"])
96
+ dependents = st.selectbox("Dependents", ["0", "1", "2", "3+"])
97
+ education = st.selectbox("Education", ["Graduate", "Not Graduate"])
98
+
99
+ with col2:
100
+ self_employed = st.selectbox("Self Employed", ["Yes", "No"])
101
+ applicant_income = st.number_input("Applicant Income", min_value=0)
102
+ coapplicant_income = st.number_input("Coapplicant Income", min_value=0)
103
+ loan_amount = st.number_input("Loan Amount", min_value=0)
104
+
105
+ with col3:
106
+ loan_amount_term = st.number_input("Loan Amount Term (in months)", min_value=0)
107
+ credit_history = st.selectbox("Credit History", [0, 1])
108
+ property_area = st.selectbox("Property Area", ["Urban", "Semiurban", "Rural"])
109
+
110
+ if st.button("Predict"):
111
+ input_data = {
112
+ 'Gender': gender,
113
+ 'Married': married,
114
+ 'Dependents': dependents,
115
+ 'Education': education,
116
+ 'Self_Employed': self_employed,
117
+ 'ApplicantIncome': np.log1p(applicant_income),
118
+ 'CoapplicantIncome': np.log1p(coapplicant_income),
119
+ 'LoanAmount': np.log1p(loan_amount),
120
+ 'Loan_Amount_Term': loan_amount_term,
121
+ 'Credit_History': credit_history,
122
+ 'Property_Area': property_area
123
+ }
124
+
125
+ prediction, probability = predict_loan_approval(model, scaler, feature_names, input_data)
126
+
127
+ st.subheader("Prediction Result")
128
+ if prediction == 'Y':
129
+ st.success(f"Congratulations! Your loan is likely to be approved with a {probability:.2%} chance.")
130
+ else:
131
+ st.error(f"Sorry, your loan is likely to be rejected. The approval chance is {probability:.2%}.")
132
+
133
+ # Visualization of prediction probability
134
+ fig = go.Figure(go.Indicator(
135
+ mode = "gauge+number",
136
+ value = probability * 100,
137
+ domain = {'x': [0, 1], 'y': [0, 1]},
138
+ title = {'text': "Approval Probability"},
139
+ gauge = {
140
+ 'axis': {'range': [0, 100]},
141
+ 'bar': {'color': "darkblue"},
142
+ 'steps': [
143
+ {'range': [0, 30], 'color': "lightgray"},
144
+ {'range': [30, 70], 'color': "gray"},
145
+ {'range': [70, 100], 'color': "darkgray"}
146
+ ],
147
+ 'threshold': {
148
+ 'line': {'color': "red", 'width': 4},
149
+ 'thickness': 0.75,
150
+ 'value': 30
151
+ }
152
+ }
153
+ ))
154
+ st.plotly_chart(fig)
155
+
156
+ elif page == "Explore Data":
157
+ st.title("Explore Loan Application Data")
158
+
159
+ # Data overview
160
+ st.subheader("Data Overview")
161
+ st.write(data.head())
162
+ st.write(f"Total number of records: {len(data)}")
163
+
164
+ # Loan Status Distribution
165
+ st.subheader("Loan Status Distribution")
166
+ fig = px.pie(data, names='Loan_Status', title='Loan Status Distribution', hole=0.3,
167
+ color_discrete_sequence=px.colors.sequential.RdBu)
168
+ st.plotly_chart(fig)
169
+
170
+ # Correlation Heatmap
171
+ st.subheader("Correlation Heatmap")
172
+ numeric_cols = data.select_dtypes(include=[np.number]).columns
173
+ corr_matrix = data[numeric_cols].corr()
174
+ fig = px.imshow(corr_matrix, text_auto=True, aspect="auto", color_continuous_scale='RdBu')
175
+ st.plotly_chart(fig)
176
+
177
+ # Loan Amount Distribution
178
+ st.subheader("Loan Amount Distribution")
179
+ fig = px.histogram(data, x="LoanAmount", nbins=50, title="Loan Amount Distribution",
180
+ color="Loan_Status", color_discrete_sequence=px.colors.sequential.RdBu)
181
+ st.plotly_chart(fig)
182
+
183
+ # Applicant Income vs Loan Amount
184
+ st.subheader("Applicant Income vs Loan Amount")
185
+ fig = px.scatter(data, x="ApplicantIncome", y="LoanAmount", color="Loan_Status",
186
+ title="Applicant Income vs Loan Amount",
187
+ color_discrete_sequence=px.colors.sequential.RdBu)
188
+ st.plotly_chart(fig)
189
+
190
+ # Loan Status by Education and Credit History
191
+ st.subheader("Loan Status by Education and Credit History")
192
+ fig = px.sunburst(data, path=['Education', 'Credit_History', 'Loan_Status'],
193
+ title="Loan Status by Education and Credit History",
194
+ color='Loan_Status', color_discrete_sequence=px.colors.sequential.RdBu)
195
+ st.plotly_chart(fig)
196
+
197
+ if __name__ == "__main__":
198
  main()