Spaces:
Runtime error
Runtime error
nurindahpratiwi
commited on
Commit
Β·
fc524a4
1
Parent(s):
7fa80d3
update
Browse files
app.py
CHANGED
@@ -1,21 +1,24 @@
|
|
1 |
-
import streamlit as st
|
2 |
import pandas as pd
|
3 |
-
import
|
4 |
-
import
|
|
|
5 |
from huggingface_hub import hf_hub_download
|
6 |
-
import
|
7 |
-
import base64
|
8 |
-
from transformers import AutoTokenizer
|
9 |
-
|
10 |
-
# Load the pre-trained numerical imputer, scaler, and model using joblib
|
11 |
|
12 |
-
REPO_ID = "
|
13 |
-
tokenizer.push_to_hub(REPO_ID)
|
14 |
|
15 |
num_imputer = joblib.load(
|
16 |
hf_hub_download(repo_id=REPO_ID, filename="numerical_imputer.joblib")
|
17 |
)
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
scaler = joblib.load(
|
20 |
hf_hub_download(repo_id=REPO_ID, filename="scaler.joblib")
|
21 |
)
|
@@ -24,145 +27,118 @@ model = joblib.load(
|
|
24 |
hf_hub_download(repo_id=REPO_ID, filename="Final_model.joblib")
|
25 |
)
|
26 |
|
27 |
-
#
|
28 |
-
def
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
#
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
st.subheader('Feature Importance')
|
144 |
-
fig, ax = plt.subplots()
|
145 |
-
bars = ax.bar(importance_df['Feature'], importance_df['Importance'])
|
146 |
-
ax.set_xlabel('Feature')
|
147 |
-
ax.set_ylabel('Importance')
|
148 |
-
ax.set_title('Feature Importance')
|
149 |
-
ax.tick_params(axis='x', rotation=45)
|
150 |
-
|
151 |
-
# Add data labels to the bars
|
152 |
-
for bar in bars:
|
153 |
-
height = bar.get_height()
|
154 |
-
ax.annotate(f'{height:.2f}', xy=(bar.get_x() + bar.get_width() / 2, height),
|
155 |
-
xytext=(0, 3), # 3 points vertical offset
|
156 |
-
textcoords="offset points",
|
157 |
-
ha='center', va='bottom')
|
158 |
-
st.pyplot(fig)
|
159 |
-
|
160 |
-
else:
|
161 |
-
st.write('Feature importance is not available for this model.')
|
162 |
-
|
163 |
-
#st.subheader('Sepsis Explanation')
|
164 |
-
#st.markdown(f"{status_icon} {sepsis_explanation}")
|
165 |
-
|
166 |
-
|
167 |
-
if __name__ == '__main__':
|
168 |
-
main()
|
|
|
|
|
1 |
import pandas as pd
|
2 |
+
from transformers import pipeline
|
3 |
+
import streamlit as st
|
4 |
+
import datetime
|
5 |
from huggingface_hub import hf_hub_download
|
6 |
+
import joblib
|
|
|
|
|
|
|
|
|
7 |
|
8 |
+
REPO_ID = "AlbieCofie/predict-customer-churn"
|
|
|
9 |
|
10 |
num_imputer = joblib.load(
|
11 |
hf_hub_download(repo_id=REPO_ID, filename="numerical_imputer.joblib")
|
12 |
)
|
13 |
|
14 |
+
cat_imputer = joblib.load(
|
15 |
+
hf_hub_download(repo_id=REPO_ID, filename="categorical_imputer.joblib")
|
16 |
+
)
|
17 |
+
|
18 |
+
encoder = joblib.load(
|
19 |
+
hf_hub_download(repo_id=REPO_ID, filename="encoder.joblib")
|
20 |
+
)
|
21 |
+
|
22 |
scaler = joblib.load(
|
23 |
hf_hub_download(repo_id=REPO_ID, filename="scaler.joblib")
|
24 |
)
|
|
|
27 |
hf_hub_download(repo_id=REPO_ID, filename="Final_model.joblib")
|
28 |
)
|
29 |
|
30 |
+
# Create a function that applies the ML pipeline and makes predictions
|
31 |
+
def predict(gender,SeniorCitizen,Partner,Dependents, tenure, PhoneService,MultipleLines,
|
32 |
+
InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,
|
33 |
+
Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges):
|
34 |
+
|
35 |
+
|
36 |
+
|
37 |
+
# Create a dataframe with the input data
|
38 |
+
input_df = pd.DataFrame({
|
39 |
+
'gender': [gender],
|
40 |
+
'SeniorCitizen': [SeniorCitizen],
|
41 |
+
'Partner': [Partner],
|
42 |
+
'Dependents': [Dependents],
|
43 |
+
'tenure': [tenure],
|
44 |
+
'PhoneService': [PhoneService],
|
45 |
+
'MultipleLines': [MultipleLines],
|
46 |
+
'InternetService': [InternetService],
|
47 |
+
'OnlineSecurity': [OnlineSecurity],
|
48 |
+
'OnlineBackup': [OnlineBackup],
|
49 |
+
'DeviceProtection': [DeviceProtection],
|
50 |
+
'TechSupport': [TechSupport],
|
51 |
+
'StreamingTV': [StreamingTV],
|
52 |
+
'StreamingMovies': [StreamingMovies],
|
53 |
+
'Contract': [Contract],
|
54 |
+
'PaperlessBilling': [PaperlessBilling],
|
55 |
+
'PaymentMethod': [PaymentMethod],
|
56 |
+
'MonthlyCharges': [MonthlyCharges],
|
57 |
+
'TotalCharges': [TotalCharges]
|
58 |
+
})
|
59 |
+
|
60 |
+
# Selecting categorical and numerical columns separately
|
61 |
+
cat_columns = [col for col in input_df.columns if input_df[col].dtype == 'object']
|
62 |
+
num_columns = [col for col in input_df.columns if input_df[col].dtype != 'object']
|
63 |
+
|
64 |
+
# Apply the imputers on the input data
|
65 |
+
input_df_imputed_cat = cat_imputer.transform(input_df[cat_columns])
|
66 |
+
input_df_imputed_num = num_imputer.transform(input_df[num_columns])
|
67 |
+
|
68 |
+
# Encode the categorical columns
|
69 |
+
input_encoded_df = pd.DataFrame(encoder.transform(input_df_imputed_cat).toarray(),
|
70 |
+
columns=encoder.get_feature_names_out(cat_columns))
|
71 |
+
|
72 |
+
# Scale the numerical columns
|
73 |
+
input_df_scaled = scaler.transform(input_df_imputed_num)
|
74 |
+
input_scaled_df = pd.DataFrame(input_df_scaled , columns = num_columns)
|
75 |
+
|
76 |
+
|
77 |
+
#joining the cat encoded and num scaled
|
78 |
+
final_df = pd.concat([input_encoded_df, input_scaled_df], axis=1)
|
79 |
+
|
80 |
+
final_df = final_df.reindex(columns=['SeniorCitizen','tenure','MonthlyCharges','TotalCharges',
|
81 |
+
'gender_Female','gender_Male','Partner_No','Partner_Yes','Dependents_No','Dependents_Yes','PhoneService_No',
|
82 |
+
'PhoneService_Yes','MultipleLines_No','MultipleLines_Yes','InternetService_DSL','InternetService_Fiber optic',
|
83 |
+
'InternetService_No','OnlineSecurity_No','OnlineSecurity_Yes','OnlineBackup_No','OnlineBackup_Yes','DeviceProtection_No',
|
84 |
+
'DeviceProtection_Yes','TechSupport_No','TechSupport_Yes','StreamingTV_No','StreamingTV_Yes','StreamingMovies_No',
|
85 |
+
'StreamingMovies_Yes','Contract_Month-to-month','Contract_One year','Contract_Two year','PaperlessBilling_No',
|
86 |
+
'PaperlessBilling_Yes','PaymentMethod_Bank transfer (automatic)','PaymentMethod_Credit card (automatic)','PaymentMethod_Electronic check',
|
87 |
+
'PaymentMethod_Mailed check'])
|
88 |
+
|
89 |
+
# Make predictions using the model
|
90 |
+
predictions = model.predict(final_df)[0]
|
91 |
+
#prediction = model.predict(final_df)[0]
|
92 |
+
|
93 |
+
# Make predictions using the model
|
94 |
+
#predictions = model.predict(final_df)
|
95 |
+
|
96 |
+
# Convert the numpy array to an integer
|
97 |
+
#prediction_label = int(predictions.item())
|
98 |
+
|
99 |
+
prediction_label = "Beware!!! This customer is likely to Churn" if predictions.item() == "Yes" else "This customer is Not likely churn"
|
100 |
+
|
101 |
+
|
102 |
+
return prediction_label
|
103 |
+
|
104 |
+
#return predictions
|
105 |
+
|
106 |
+
|
107 |
+
if 'clicked' not in st.session_state:
|
108 |
+
st.session_state.clicked = False
|
109 |
+
|
110 |
+
def click_button():
|
111 |
+
st.session_state.clicked = True
|
112 |
+
|
113 |
+
|
114 |
+
st.title("CUSTOMER CHURN PREDICTION APP")
|
115 |
+
|
116 |
+
with st.form(key="customer-information"):
|
117 |
+
st.markdown("This app predicts whether a customer will leave your company or not. Enter the details of the customer below to see the result")
|
118 |
+
gender = st.radio('Select your gender', ('male', 'female'))
|
119 |
+
SeniorCitizen = st.radio("Are you a Seniorcitizen; No=0 and Yes=1", ('0', '1'))
|
120 |
+
Partner = st.radio('Do you have Partner', ('Yes', 'No'))
|
121 |
+
Dependents = st.selectbox('Do you have any Dependents?', ('No', 'Yes'))
|
122 |
+
tenure = st.number_input('Lenght of tenure (no. of months with Telco)', min_value=0, max_value=90, value=1, step=1)
|
123 |
+
PhoneService = st.radio('Do you have PhoneService? ', ('No', 'Yes'))
|
124 |
+
MultipleLines = st.radio('Do you have MultipleLines', ('No', 'Yes'))
|
125 |
+
InternetService = st.radio('Do you have InternetService', ('DSL', 'Fiber optic', 'No'))
|
126 |
+
OnlineSecurity = st.radio('Do you have OnlineSecurity?', ('No', 'Yes'))
|
127 |
+
OnlineBackup = st.radio('Do you have OnlineBackup?', ('No', 'Yes'))
|
128 |
+
DeviceProtection = st.radio('Do you have DeviceProtection?', ('No', 'Yes'))
|
129 |
+
TechSupport = st.radio('Do you have TechSupport?', ('No', 'Yes'))
|
130 |
+
StreamingTV = st.radio('Do you have StreamingTV?', ('No', 'Yes'))
|
131 |
+
StreamingMovies = st.radio('Do you have StreamingMovies?', ('No', 'Yes'))
|
132 |
+
Contract = st.selectbox('which Contract do you use?', ('Month-to-month', 'One year', 'Two year'))
|
133 |
+
PaperlessBilling = st.radio('Do you prefer PaperlessBilling?', ('Yes', 'No'))
|
134 |
+
PaymentMethod = st.selectbox('Which PaymentMethod do you prefer?', ('Electronic check', 'Mailed check', 'Bank transfer (automatic)',
|
135 |
+
'Credit card (automatic)'))
|
136 |
+
MonthlyCharges = st.number_input("Enter monthly charges (the range should between 0-120)")
|
137 |
+
TotalCharges = st.number_input("Enter total charges (the range should between 0-10.000)")
|
138 |
+
st.form_submit_button('Predict', on_click=click_button)
|
139 |
+
|
140 |
+
if st.session_state.clicked:
|
141 |
+
# The message and nested widget will remain on the page
|
142 |
+
predict(gender,SeniorCitizen,Partner,Dependents, tenure, PhoneService,MultipleLines,
|
143 |
+
InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,
|
144 |
+
Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app_.py
DELETED
@@ -1,144 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
from transformers import pipeline
|
3 |
-
import streamlit as st
|
4 |
-
import datetime
|
5 |
-
from huggingface_hub import hf_hub_download
|
6 |
-
import joblib
|
7 |
-
|
8 |
-
REPO_ID = "AlbieCofie/predict-customer-churn"
|
9 |
-
FILENAME = "sklearn_model.joblib"
|
10 |
-
|
11 |
-
num_imputer = joblib.load(
|
12 |
-
hf_hub_download(repo_id=REPO_ID, filename="numerical_imputer.joblib")
|
13 |
-
)
|
14 |
-
|
15 |
-
cat_imputer = joblib.load(
|
16 |
-
hf_hub_download(repo_id=REPO_ID, filename="categorical_imputer.joblib")
|
17 |
-
)
|
18 |
-
|
19 |
-
encoder = joblib.load(
|
20 |
-
hf_hub_download(repo_id=REPO_ID, filename="encoder.joblib")
|
21 |
-
)
|
22 |
-
|
23 |
-
scaler = joblib.load(
|
24 |
-
hf_hub_download(repo_id=REPO_ID, filename="scaler.joblib")
|
25 |
-
)
|
26 |
-
|
27 |
-
model = joblib.load(
|
28 |
-
hf_hub_download(repo_id=REPO_ID, filename="Final_model.joblib")
|
29 |
-
)
|
30 |
-
|
31 |
-
# Create a function that applies the ML pipeline and makes predictions
|
32 |
-
def predict(gender,SeniorCitizen,Partner,Dependents, tenure, PhoneService,MultipleLines,
|
33 |
-
InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,
|
34 |
-
Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges):
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
# Create a dataframe with the input data
|
39 |
-
input_df = pd.DataFrame({
|
40 |
-
'gender': [gender],
|
41 |
-
'SeniorCitizen': [SeniorCitizen],
|
42 |
-
'Partner': [Partner],
|
43 |
-
'Dependents': [Dependents],
|
44 |
-
'tenure': [tenure],
|
45 |
-
'PhoneService': [PhoneService],
|
46 |
-
'MultipleLines': [MultipleLines],
|
47 |
-
'InternetService': [InternetService],
|
48 |
-
'OnlineSecurity': [OnlineSecurity],
|
49 |
-
'OnlineBackup': [OnlineBackup],
|
50 |
-
'DeviceProtection': [DeviceProtection],
|
51 |
-
'TechSupport': [TechSupport],
|
52 |
-
'StreamingTV': [StreamingTV],
|
53 |
-
'StreamingMovies': [StreamingMovies],
|
54 |
-
'Contract': [Contract],
|
55 |
-
'PaperlessBilling': [PaperlessBilling],
|
56 |
-
'PaymentMethod': [PaymentMethod],
|
57 |
-
'MonthlyCharges': [MonthlyCharges],
|
58 |
-
'TotalCharges': [TotalCharges]
|
59 |
-
})
|
60 |
-
|
61 |
-
# Selecting categorical and numerical columns separately
|
62 |
-
cat_columns = [col for col in input_df.columns if input_df[col].dtype == 'object']
|
63 |
-
num_columns = [col for col in input_df.columns if input_df[col].dtype != 'object']
|
64 |
-
|
65 |
-
# Apply the imputers on the input data
|
66 |
-
input_df_imputed_cat = cat_imputer.transform(input_df[cat_columns])
|
67 |
-
input_df_imputed_num = num_imputer.transform(input_df[num_columns])
|
68 |
-
|
69 |
-
# Encode the categorical columns
|
70 |
-
input_encoded_df = pd.DataFrame(encoder.transform(input_df_imputed_cat).toarray(),
|
71 |
-
columns=encoder.get_feature_names_out(cat_columns))
|
72 |
-
|
73 |
-
# Scale the numerical columns
|
74 |
-
input_df_scaled = scaler.transform(input_df_imputed_num)
|
75 |
-
input_scaled_df = pd.DataFrame(input_df_scaled , columns = num_columns)
|
76 |
-
|
77 |
-
|
78 |
-
#joining the cat encoded and num scaled
|
79 |
-
final_df = pd.concat([input_encoded_df, input_scaled_df], axis=1)
|
80 |
-
|
81 |
-
final_df = final_df.reindex(columns=['SeniorCitizen','tenure','MonthlyCharges','TotalCharges',
|
82 |
-
'gender_Female','gender_Male','Partner_No','Partner_Yes','Dependents_No','Dependents_Yes','PhoneService_No',
|
83 |
-
'PhoneService_Yes','MultipleLines_No','MultipleLines_Yes','InternetService_DSL','InternetService_Fiber optic',
|
84 |
-
'InternetService_No','OnlineSecurity_No','OnlineSecurity_Yes','OnlineBackup_No','OnlineBackup_Yes','DeviceProtection_No',
|
85 |
-
'DeviceProtection_Yes','TechSupport_No','TechSupport_Yes','StreamingTV_No','StreamingTV_Yes','StreamingMovies_No',
|
86 |
-
'StreamingMovies_Yes','Contract_Month-to-month','Contract_One year','Contract_Two year','PaperlessBilling_No',
|
87 |
-
'PaperlessBilling_Yes','PaymentMethod_Bank transfer (automatic)','PaymentMethod_Credit card (automatic)','PaymentMethod_Electronic check',
|
88 |
-
'PaymentMethod_Mailed check'])
|
89 |
-
|
90 |
-
# Make predictions using the model
|
91 |
-
predictions = model.predict(final_df)
|
92 |
-
|
93 |
-
# Make predictions using the model
|
94 |
-
#predictions = model.predict(final_df)
|
95 |
-
|
96 |
-
# Convert the numpy array to an integer
|
97 |
-
#prediction_label = int(predictions.item())
|
98 |
-
|
99 |
-
prediction_label = "Beware!!! This customer is likely to Churn" if predictions.item() == "Yes" else "This customer is Not likely churn"
|
100 |
-
|
101 |
-
|
102 |
-
return prediction_label
|
103 |
-
|
104 |
-
#return predictions
|
105 |
-
|
106 |
-
|
107 |
-
if 'clicked' not in st.session_state:
|
108 |
-
st.session_state.clicked = False
|
109 |
-
|
110 |
-
def click_button():
|
111 |
-
st.session_state.clicked = True
|
112 |
-
|
113 |
-
|
114 |
-
st.title("CUSTOMER CHURN PREDICTION APP")
|
115 |
-
|
116 |
-
with st.form(key="customer-information"):
|
117 |
-
st.markdown("This app predicts whether a customer will leave your company or not. Enter the details of the customer below to see the result")
|
118 |
-
gender = st.radio('Select your gender', ('male', 'female'))
|
119 |
-
SeniorCitizen = st.radio("Are you a Seniorcitizen; No=0 and Yes=1", ('0', '1'))
|
120 |
-
Partner = st.radio('Do you have Partner', ('Yes', 'No'))
|
121 |
-
Dependents = st.selectbox('Do you have any Dependents?', ('No', 'Yes'))
|
122 |
-
tenure = st.number_input('Lenght of tenure (no. of months with Telco)', min_value=0, max_value=90, value=1, step=1)
|
123 |
-
PhoneService = st.radio('Do you have PhoneService? ', ('No', 'Yes'))
|
124 |
-
MultipleLines = st.radio('Do you have MultipleLines', ('No', 'Yes'))
|
125 |
-
InternetService = st.radio('Do you have InternetService', ('DSL', 'Fiber optic', 'No'))
|
126 |
-
OnlineSecurity = st.radio('Do you have OnlineSecurity?', ('No', 'Yes'))
|
127 |
-
OnlineBackup = st.radio('Do you have OnlineBackup?', ('No', 'Yes'))
|
128 |
-
DeviceProtection = st.radio('Do you have DeviceProtection?', ('No', 'Yes'))
|
129 |
-
TechSupport = st.radio('Do you have TechSupport?', ('No', 'Yes'))
|
130 |
-
StreamingTV = st.radio('Do you have StreamingTV?', ('No', 'Yes'))
|
131 |
-
StreamingMovies = st.radio('Do you have StreamingMovies?', ('No', 'Yes'))
|
132 |
-
Contract = st.selectbox('which Contract do you use?', ('Month-to-month', 'One year', 'Two year'))
|
133 |
-
PaperlessBilling = st.radio('Do you prefer PaperlessBilling?', ('Yes', 'No'))
|
134 |
-
PaymentMethod = st.selectbox('Which PaymentMethod do you prefer?', ('Electronic check', 'Mailed check', 'Bank transfer (automatic)',
|
135 |
-
'Credit card (automatic)'))
|
136 |
-
MonthlyCharges = st.number_input("Enter monthly charges (the range should between 0-120)")
|
137 |
-
TotalCharges = st.number_input("Enter total charges (the range should between 0-10.000)")
|
138 |
-
st.form_submit_button('Predict', on_click=click_button)
|
139 |
-
|
140 |
-
if st.session_state.clicked:
|
141 |
-
# The message and nested widget will remain on the page
|
142 |
-
predict(gender,SeniorCitizen,Partner,Dependents, tenure, PhoneService,MultipleLines,
|
143 |
-
InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,
|
144 |
-
Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app_3.py
ADDED
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import joblib
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
from huggingface_hub import hf_hub_download
|
6 |
+
import time
|
7 |
+
import base64
|
8 |
+
from transformers import AutoTokenizer
|
9 |
+
|
10 |
+
# Load the pre-trained numerical imputer, scaler, and model using joblib
|
11 |
+
|
12 |
+
REPO_ID = "AlbieCofie/predict-customer-churn"
|
13 |
+
|
14 |
+
num_imputer = joblib.load(
|
15 |
+
hf_hub_download(repo_id=REPO_ID, filename="numerical_imputer.joblib")
|
16 |
+
)
|
17 |
+
|
18 |
+
scaler = joblib.load(
|
19 |
+
hf_hub_download(repo_id=REPO_ID, filename="scaler.joblib")
|
20 |
+
)
|
21 |
+
|
22 |
+
model = joblib.load(
|
23 |
+
hf_hub_download(repo_id=REPO_ID, filename="Final_model.joblib")
|
24 |
+
)
|
25 |
+
|
26 |
+
# Define a function to preprocess the input data
|
27 |
+
def preprocess_input_data(input_data):
|
28 |
+
input_data_df = pd.DataFrame(input_data, columns=['PRG', 'PL', 'PR', 'SK', 'TS', 'M11', 'BD2', 'Age', 'Insurance'])
|
29 |
+
num_columns = input_data_df.select_dtypes(include='number').columns
|
30 |
+
|
31 |
+
input_data_imputed_num = num_imputer.transform(input_data_df[num_columns])
|
32 |
+
input_scaled_df = pd.DataFrame(scaler.transform(input_data_imputed_num), columns=num_columns)
|
33 |
+
|
34 |
+
return input_scaled_df
|
35 |
+
|
36 |
+
|
37 |
+
# Define a function to make the sepsis prediction
|
38 |
+
def predict_sepsis(input_data):
|
39 |
+
input_scaled_df = preprocess_input_data(input_data)
|
40 |
+
prediction = model.predict(input_scaled_df)[0]
|
41 |
+
probabilities = model.predict_proba(input_scaled_df)[0]
|
42 |
+
sepsis_status = "Positive" if prediction == 1 else "Negative"
|
43 |
+
|
44 |
+
status_icon = "β" if prediction == 1 else "β" # Red 'X' icon for positive sepsis prediction, green checkmark icon for negative sepsis prediction
|
45 |
+
sepsis_explanation = "Sepsis is a life-threatening condition caused by an infection. A positive prediction suggests that the patient might be exhibiting sepsis symptoms and requires immediate medical attention." if prediction == 1 else "Sepsis is a life-threatening condition caused by an infection. A negative prediction suggests that the patient is not currently exhibiting sepsis symptoms."
|
46 |
+
|
47 |
+
output_df = pd.DataFrame(input_data, columns=['PRG', 'PL', 'PR', 'SK', 'TS', 'M11', 'BD2', 'Age', 'Insurance'])
|
48 |
+
output_df['Prediction'] = sepsis_status
|
49 |
+
output_df['Negative Probability'] = probabilities[0]
|
50 |
+
output_df['Positive Probability'] = probabilities[1]
|
51 |
+
|
52 |
+
return output_df, probabilities, status_icon, sepsis_explanation
|
53 |
+
|
54 |
+
# Create a Streamlit app
|
55 |
+
def main():
|
56 |
+
st.title('Sepsis Prediction App')
|
57 |
+
|
58 |
+
st.image("Strealit_.jpg")
|
59 |
+
|
60 |
+
# How to use
|
61 |
+
st.sidebar.title('How to Use')
|
62 |
+
st.sidebar.markdown('1. Adjust the input parameters on the left sidebar.')
|
63 |
+
st.sidebar.markdown('2. Click the "Predict" button to initiate the prediction.')
|
64 |
+
st.sidebar.markdown('3. The app will simulate a prediction process with a progress bar.')
|
65 |
+
st.sidebar.markdown('4. Once the prediction is complete, the results will be displayed below.')
|
66 |
+
|
67 |
+
|
68 |
+
st.sidebar.title('Input Parameters')
|
69 |
+
|
70 |
+
# Input parameter explanations
|
71 |
+
st.sidebar.markdown('**PRG:** Plasma Glucose')
|
72 |
+
PRG = st.sidebar.number_input('PRG', value=0.0)
|
73 |
+
|
74 |
+
st.sidebar.markdown('**PL:** Blood Work Result 1')
|
75 |
+
PL = st.sidebar.number_input('PL', value=0.0)
|
76 |
+
|
77 |
+
st.sidebar.markdown('**PR:** Blood Pressure Measured')
|
78 |
+
PR = st.sidebar.number_input('PR', value=0.0)
|
79 |
+
|
80 |
+
st.sidebar.markdown('**SK:** Blood Work Result 2')
|
81 |
+
SK = st.sidebar.number_input('SK', value=0.0)
|
82 |
+
|
83 |
+
st.sidebar.markdown('**TS:** Blood Work Result 3')
|
84 |
+
TS = st.sidebar.number_input('TS', value=0.0)
|
85 |
+
|
86 |
+
st.sidebar.markdown('**M11:** BMI')
|
87 |
+
M11 = st.sidebar.number_input('M11', value=0.0)
|
88 |
+
|
89 |
+
st.sidebar.markdown('**BD2:** Blood Work Result 4')
|
90 |
+
BD2 = st.sidebar.number_input('BD2', value=0.0)
|
91 |
+
|
92 |
+
st.sidebar.markdown('**Age:** What is the Age of the Patient: ')
|
93 |
+
Age = st.sidebar.number_input('Age', value=0.0)
|
94 |
+
|
95 |
+
st.sidebar.markdown('**Insurance:** Does the patient have Insurance?')
|
96 |
+
insurance_options = {0: 'NO', 1: 'YES'}
|
97 |
+
Insurance = st.sidebar.radio('Insurance', list(insurance_options.keys()), format_func=lambda x: insurance_options[x])
|
98 |
+
|
99 |
+
|
100 |
+
input_data = [[PRG, PL, PR, SK, TS, M11, BD2, Age, Insurance]]
|
101 |
+
|
102 |
+
if st.sidebar.button('Predict'):
|
103 |
+
with st.spinner("Predicting..."):
|
104 |
+
# Simulate a long-running process
|
105 |
+
progress_bar = st.progress(0)
|
106 |
+
step = 20 # A big step will reduce the execution time
|
107 |
+
for i in range(0, 100, step):
|
108 |
+
time.sleep(0.1)
|
109 |
+
progress_bar.progress(i + step)
|
110 |
+
|
111 |
+
output_df, probabilities, status_icon, sepsis_explanation = predict_sepsis(input_data)
|
112 |
+
|
113 |
+
st.subheader('Prediction Result')
|
114 |
+
prediction_text = "Positive" if status_icon == "β" else "Negative"
|
115 |
+
st.markdown(f"Prediction: **{prediction_text}**")
|
116 |
+
st.markdown(f"{status_icon} {sepsis_explanation}")
|
117 |
+
st.write(output_df)
|
118 |
+
|
119 |
+
# Add a download button for output_df
|
120 |
+
csv = output_df.to_csv(index=False)
|
121 |
+
b64 = base64.b64encode(csv.encode()).decode()
|
122 |
+
href = f'<a href="data:file/csv;base64,{b64}" download="output.csv">Download Output CSV</a>'
|
123 |
+
st.markdown(href, unsafe_allow_html=True)
|
124 |
+
|
125 |
+
|
126 |
+
# Plot the probabilities
|
127 |
+
fig, ax = plt.subplots()
|
128 |
+
ax.bar(['Negative', 'Positive'], probabilities)
|
129 |
+
ax.set_xlabel('Sepsis Status')
|
130 |
+
ax.set_ylabel('Probability')
|
131 |
+
ax.set_title('Sepsis Prediction Probabilities')
|
132 |
+
st.pyplot(fig)
|
133 |
+
|
134 |
+
# Print feature importance
|
135 |
+
if hasattr(model, 'coef_'):
|
136 |
+
feature_importances = model.coef_[0]
|
137 |
+
feature_names = ['PRG', 'PL', 'PR', 'SK', 'TS', 'M11', 'BD2', 'Age', 'Insurance']
|
138 |
+
|
139 |
+
importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importances})
|
140 |
+
importance_df = importance_df.sort_values('Importance', ascending=False)
|
141 |
+
|
142 |
+
st.subheader('Feature Importance')
|
143 |
+
fig, ax = plt.subplots()
|
144 |
+
bars = ax.bar(importance_df['Feature'], importance_df['Importance'])
|
145 |
+
ax.set_xlabel('Feature')
|
146 |
+
ax.set_ylabel('Importance')
|
147 |
+
ax.set_title('Feature Importance')
|
148 |
+
ax.tick_params(axis='x', rotation=45)
|
149 |
+
|
150 |
+
# Add data labels to the bars
|
151 |
+
for bar in bars:
|
152 |
+
height = bar.get_height()
|
153 |
+
ax.annotate(f'{height:.2f}', xy=(bar.get_x() + bar.get_width() / 2, height),
|
154 |
+
xytext=(0, 3), # 3 points vertical offset
|
155 |
+
textcoords="offset points",
|
156 |
+
ha='center', va='bottom')
|
157 |
+
st.pyplot(fig)
|
158 |
+
|
159 |
+
else:
|
160 |
+
st.write('Feature importance is not available for this model.')
|
161 |
+
|
162 |
+
#st.subheader('Sepsis Explanation')
|
163 |
+
#st.markdown(f"{status_icon} {sepsis_explanation}")
|
164 |
+
|
165 |
+
|
166 |
+
if __name__ == '__main__':
|
167 |
+
main()
|