Spaces:
Sleeping
Sleeping
Commit
·
40db5de
1
Parent(s):
416d54c
Upload Model
Browse files- app.py +12 -0
- churn.csv +0 -0
- eda.py +135 -0
- final_pipeline.pkl +3 -0
- model.h5 +3 -0
- prediction.py +90 -0
- requirements.txt +7 -0
app.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import eda
|
3 |
+
import prediction
|
4 |
+
|
5 |
+
|
6 |
+
navigation = st.sidebar.selectbox('Pilih Halaman', ('EDA', 'Predict'))
|
7 |
+
|
8 |
+
|
9 |
+
if navigation == 'EDA':
|
10 |
+
eda.run()
|
11 |
+
else:
|
12 |
+
prediction.run()
|
churn.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eda.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import seaborn as sns
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
import plotly.express as px
|
7 |
+
|
8 |
+
st.set_page_config(page_title='Internet Service Provider Customer Churn Dataset Analysis', layout='wide', initial_sidebar_state='expanded')
|
9 |
+
|
10 |
+
def run():
|
11 |
+
|
12 |
+
# Buat Title
|
13 |
+
st.title('EDA on Customer Churn')
|
14 |
+
|
15 |
+
# Buat Deskripsi
|
16 |
+
st.subheader('Written by Franciscus Andrew Sunanda, FTDS-RMT-018')
|
17 |
+
|
18 |
+
st.markdown('---')
|
19 |
+
|
20 |
+
|
21 |
+
st.write('Dataset : Internet Service Provider Customer Churn')
|
22 |
+
|
23 |
+
st.write('Objective : To create a model that can predict whether a customers will churn or not')
|
24 |
+
|
25 |
+
st.write('Evaluation Metrics will be using Recall Score to minimize the False Negatives predicted by the model')
|
26 |
+
|
27 |
+
|
28 |
+
st.markdown('---')
|
29 |
+
|
30 |
+
st.write('## Dataset')
|
31 |
+
data = pd.read_csv('churn.csv')
|
32 |
+
|
33 |
+
st.dataframe(data)
|
34 |
+
|
35 |
+
|
36 |
+
st.markdown('---')
|
37 |
+
|
38 |
+
st.write('## Checking Balance / Imbalance')
|
39 |
+
|
40 |
+
churn = data['churn_risk_score'].value_counts()
|
41 |
+
|
42 |
+
fig = plt.figure()
|
43 |
+
churn.plot(kind='pie',
|
44 |
+
figsize=(10, 8),
|
45 |
+
autopct='%1.1f%%', # untuk membuat persentase
|
46 |
+
labels=None,
|
47 |
+
)
|
48 |
+
|
49 |
+
plt.title('Churn/No Churn Numbers in this Dataset')
|
50 |
+
plt.axis('equal')
|
51 |
+
plt.legend(labels=['No Churn', 'Churn'])
|
52 |
+
st.pyplot(fig)
|
53 |
+
|
54 |
+
st.write('Looks like in this dataset, for classification is already quite balanced between the two possible outcomes')
|
55 |
+
|
56 |
+
st.markdown('---')
|
57 |
+
|
58 |
+
st.write('## Complaints')
|
59 |
+
|
60 |
+
complaint = data.groupby(['churn_risk_score', 'past_complaint']).size().reset_index(name='count')
|
61 |
+
fig = plt.figure(figsize=(10,8))
|
62 |
+
ax = sns.barplot(x='past_complaint', y='count', data=complaint, hue='churn_risk_score')
|
63 |
+
plt.title('Churn Risk based on Past Complaint')
|
64 |
+
for i in ax.containers:
|
65 |
+
ax.bar_label(i,)
|
66 |
+
handles, labels = ax.get_legend_handles_labels()
|
67 |
+
plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='upper right')
|
68 |
+
st.pyplot(fig)
|
69 |
+
|
70 |
+
status = data[data['past_complaint'] == 'Yes'].groupby(['churn_risk_score','complaint_status']).size().reset_index(name='count')
|
71 |
+
fig = plt.figure(figsize=(10,8))
|
72 |
+
ax = sns.barplot(x='complaint_status', y='count', data=status, hue='churn_risk_score')
|
73 |
+
plt.title('Churn Risk based on Complaint Status')
|
74 |
+
for i in ax.containers:
|
75 |
+
ax.bar_label(i,)
|
76 |
+
handles, labels = ax.get_legend_handles_labels()
|
77 |
+
plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='upper right')
|
78 |
+
st.pyplot(fig)
|
79 |
+
|
80 |
+
st.markdown('---')
|
81 |
+
|
82 |
+
st.write('## Feedback')
|
83 |
+
|
84 |
+
feedback = data.groupby(['churn_risk_score', 'feedback']).size().reset_index(name='count')
|
85 |
+
fig = plt.figure(figsize=(10,8))
|
86 |
+
ax = sns.barplot(x='count', y='feedback', data=feedback, hue='churn_risk_score')
|
87 |
+
plt.title('Churn Risk based on Feedback')
|
88 |
+
for i in ax.containers:
|
89 |
+
ax.bar_label(i,)
|
90 |
+
handles, labels = ax.get_legend_handles_labels()
|
91 |
+
plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='lower right')
|
92 |
+
st.pyplot(fig)
|
93 |
+
|
94 |
+
st.markdown('---')
|
95 |
+
|
96 |
+
st.write('## Membership')
|
97 |
+
|
98 |
+
membership = data.groupby(['churn_risk_score', 'membership_category']).size().reset_index(name='count')
|
99 |
+
fig = plt.figure(figsize=(10,8))
|
100 |
+
ax = sns.barplot(x='count', y='membership_category', data=membership, hue='churn_risk_score')
|
101 |
+
plt.title("Churn Risk based on user's membership")
|
102 |
+
for i in ax.containers:
|
103 |
+
ax.bar_label(i,)
|
104 |
+
handles, labels = ax.get_legend_handles_labels()
|
105 |
+
plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='lower right')
|
106 |
+
st.pyplot(fig)
|
107 |
+
|
108 |
+
|
109 |
+
st.markdown('---')
|
110 |
+
|
111 |
+
st.write('Internet Service used by Customers')
|
112 |
+
|
113 |
+
service = data.groupby(['internet_option','churn_risk_score']).size().reset_index(name='count')
|
114 |
+
fig = plt.figure(figsize=(12,8))
|
115 |
+
ax = sns.barplot(x='count', y='internet_option', data=service, hue='churn_risk_score')
|
116 |
+
plt.title("Churn Risk based on user's membership")
|
117 |
+
for i in ax.containers:
|
118 |
+
ax.bar_label(i,)
|
119 |
+
handles, labels = ax.get_legend_handles_labels()
|
120 |
+
plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='lower right')
|
121 |
+
st.pyplot(fig)
|
122 |
+
|
123 |
+
service = data.groupby(['internet_option','feedback']).size().reset_index(name='count')
|
124 |
+
fig = plt.figure(figsize=(30,8))
|
125 |
+
ax = sns.barplot(x='internet_option', y='count', data=service, hue='feedback')
|
126 |
+
plt.title("Churn Risk based on user's internet option")
|
127 |
+
for i in ax.containers:
|
128 |
+
ax.bar_label(i,)
|
129 |
+
handles, labels = ax.get_legend_handles_labels()
|
130 |
+
plt.legend(handles=handles,loc='lower right')
|
131 |
+
st.pyplot(fig)
|
132 |
+
|
133 |
+
|
134 |
+
if __name__ == '__main__':
|
135 |
+
run()
|
final_pipeline.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2318547ccbf56024c7a95d40e388c16313c43d09ac82c5e9a36b969a6d5efb3
|
3 |
+
size 3226
|
model.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e34d57008fb2b5fe6398e0b8cf172ae44ac5ce604c4cc51e381727d2f340c39
|
3 |
+
size 75976
|
prediction.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import pickle
|
5 |
+
from tensorflow.keras.models import load_model
|
6 |
+
|
7 |
+
|
8 |
+
# Load the Models
|
9 |
+
|
10 |
+
with open('final_pipeline.pkl', 'rb') as file_1:
|
11 |
+
model_pipeline = pickle.load(file_1)
|
12 |
+
|
13 |
+
model_ann = load_model('model.h5')
|
14 |
+
|
15 |
+
def run():
|
16 |
+
|
17 |
+
st.title('Wine Quality Prediction')
|
18 |
+
|
19 |
+
|
20 |
+
with st.form(key='form_heart_failure'):
|
21 |
+
user = st.text_input('User ID', max_chars=20)
|
22 |
+
age = st.number_input('Age', min_value=1, max_value=100, value=25,step=1)
|
23 |
+
gender = st.selectbox('Are you a male or female?', ('Male','Female'))
|
24 |
+
region = st.selectbox('In which region do you live?', ('City','Town', 'Village'))
|
25 |
+
member = st.selectbox('Your level of membership?', ('No Membership', 'Basic Membership', 'Silver Membership', 'Gold Membership', 'Premium Membership', 'Platinum Membership'))
|
26 |
+
date = st.text_input('Join date', max_chars=10, help='Please enter with yyyy-mm-dd format')
|
27 |
+
referral = st.selectbox('Did you join using referral codes?', ('Yes','No'))
|
28 |
+
offer = st.selectbox('What is your preferred offer types?', ('Gift Vouchers/Coupons', 'Credit/Debit Card Offers', 'Without Offers'))
|
29 |
+
medium = st.selectbox('Which device are you using?', ('Desktop', 'Smartphone', 'Both'))
|
30 |
+
option = st.selectbox('Which product are you using?', ('Wi-Fi', 'Fiber_Optic', 'Mobile_Data'))
|
31 |
+
time = st.text_input('Time during last visit to website', max_chars=8, help='Please enter with hh:mm:ss format')
|
32 |
+
days = st.number_input('Days since last login', min_value=0, max_value=365, value=10, step=1)
|
33 |
+
tspent = st.number_input('Average Time spent on website', min_value=0., max_value=600., value=30., step=.1)
|
34 |
+
value = st.number_input('Average Transaction Value', min_value=500., max_value=100000., value=15000., step=.1)
|
35 |
+
freq = st.number_input('Login Days Frequency', min_value=0, max_value=90, value=10, step=1)
|
36 |
+
point = st.number_input('Pints received', min_value=0., max_value=2500., value=600., step=.1)
|
37 |
+
discount = st.selectbox('Did you receive special discount?', ('Yes', 'No'))
|
38 |
+
preference = st.selectbox('Do you prefer to receive offers?', ('Yes', 'No'))
|
39 |
+
past = st.selectbox('Have you ever submitted a complaint?', ('Yes', 'No'))
|
40 |
+
status = st.selectbox('What is the outcome of the comlaints?', ('No Information Available', 'Not Applicable', 'Unsolved', 'Solved', 'Solved in Follow-up'), help='Choose Not Applicable if you have never submitted a complaint')
|
41 |
+
feedback = st.selectbox('Your feedback for us?', ('Poor Website', 'Poor Customer Service', 'Too many ads', 'Poor Product Quality', 'No reason specified', 'Products always in Stock', 'Reasonable Price', 'Quality Customer Care', 'User Friendly Website'))
|
42 |
+
|
43 |
+
submitted = st.form_submit_button('Predict')
|
44 |
+
|
45 |
+
|
46 |
+
data_inf = {
|
47 |
+
'user_id': user,
|
48 |
+
'age': age,
|
49 |
+
'gender': gender,
|
50 |
+
'region_category': region,
|
51 |
+
'membership_category': member,
|
52 |
+
'joining_date': date,
|
53 |
+
'joined_through_referral': referral,
|
54 |
+
'preferred_offer_types': offer,
|
55 |
+
'medium_of_operation': medium,
|
56 |
+
'internet_option': option,
|
57 |
+
'last_visit_time': time,
|
58 |
+
'days_since_last_login' : days,
|
59 |
+
'avg_time_spent' : tspent,
|
60 |
+
'avg_transaction_value' : value,
|
61 |
+
'avg_frequency_login_days' : freq,
|
62 |
+
'points_in_wallet' : point,
|
63 |
+
'used_special_discount' : discount,
|
64 |
+
'offer_application_preference' : preference,
|
65 |
+
'past_complaint' : past,
|
66 |
+
'complaint_status' : status,
|
67 |
+
'feedback' : feedback
|
68 |
+
}
|
69 |
+
|
70 |
+
data_inf = pd.DataFrame([data_inf])
|
71 |
+
st.dataframe(data_inf)
|
72 |
+
|
73 |
+
data_inf['gender'] = data_inf['gender'].replace({'Male': 'M', 'Female': 'F'})
|
74 |
+
|
75 |
+
if submitted:
|
76 |
+
# Transform Inference-Set
|
77 |
+
|
78 |
+
data_inf_transform = model_pipeline.transform(data_inf)
|
79 |
+
|
80 |
+
# Predict using Neural Network
|
81 |
+
|
82 |
+
y_pred_inf = model_ann.predict(data_inf_transform)
|
83 |
+
y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
|
84 |
+
y_pred_inf = np.where(y_pred_inf == 0, 'No Churn', 'Churn')
|
85 |
+
st.write('Hasil prediksi Model : ', y_pred_inf)
|
86 |
+
|
87 |
+
|
88 |
+
|
89 |
+
if __name__ == '__main__':
|
90 |
+
run()
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
seaborn
|
4 |
+
matplotlib
|
5 |
+
numpy
|
6 |
+
scikit-learn==1.2.1
|
7 |
+
tensorflow==2.11.0
|