andrewsunanda commited on
Commit
40db5de
·
1 Parent(s): 416d54c

Upload Model

Browse files
Files changed (7) hide show
  1. app.py +12 -0
  2. churn.csv +0 -0
  3. eda.py +135 -0
  4. final_pipeline.pkl +3 -0
  5. model.h5 +3 -0
  6. prediction.py +90 -0
  7. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import eda
3
+ import prediction
4
+
5
+
6
+ navigation = st.sidebar.selectbox('Pilih Halaman', ('EDA', 'Predict'))
7
+
8
+
9
+ if navigation == 'EDA':
10
+ eda.run()
11
+ else:
12
+ prediction.run()
churn.csv ADDED
The diff for this file is too large to render. See raw diff
 
eda.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import seaborn as sns
5
+ import matplotlib.pyplot as plt
6
+ import plotly.express as px
7
+
8
+ st.set_page_config(page_title='Internet Service Provider Customer Churn Dataset Analysis', layout='wide', initial_sidebar_state='expanded')
9
+
10
+ def run():
11
+
12
+ # Buat Title
13
+ st.title('EDA on Customer Churn')
14
+
15
+ # Buat Deskripsi
16
+ st.subheader('Written by Franciscus Andrew Sunanda, FTDS-RMT-018')
17
+
18
+ st.markdown('---')
19
+
20
+
21
+ st.write('Dataset : Internet Service Provider Customer Churn')
22
+
23
+ st.write('Objective : To create a model that can predict whether a customers will churn or not')
24
+
25
+ st.write('Evaluation Metrics will be using Recall Score to minimize the False Negatives predicted by the model')
26
+
27
+
28
+ st.markdown('---')
29
+
30
+ st.write('## Dataset')
31
+ data = pd.read_csv('churn.csv')
32
+
33
+ st.dataframe(data)
34
+
35
+
36
+ st.markdown('---')
37
+
38
+ st.write('## Checking Balance / Imbalance')
39
+
40
+ churn = data['churn_risk_score'].value_counts()
41
+
42
+ fig = plt.figure()
43
+ churn.plot(kind='pie',
44
+ figsize=(10, 8),
45
+ autopct='%1.1f%%', # untuk membuat persentase
46
+ labels=None,
47
+ )
48
+
49
+ plt.title('Churn/No Churn Numbers in this Dataset')
50
+ plt.axis('equal')
51
+ plt.legend(labels=['No Churn', 'Churn'])
52
+ st.pyplot(fig)
53
+
54
+ st.write('Looks like in this dataset, for classification is already quite balanced between the two possible outcomes')
55
+
56
+ st.markdown('---')
57
+
58
+ st.write('## Complaints')
59
+
60
+ complaint = data.groupby(['churn_risk_score', 'past_complaint']).size().reset_index(name='count')
61
+ fig = plt.figure(figsize=(10,8))
62
+ ax = sns.barplot(x='past_complaint', y='count', data=complaint, hue='churn_risk_score')
63
+ plt.title('Churn Risk based on Past Complaint')
64
+ for i in ax.containers:
65
+ ax.bar_label(i,)
66
+ handles, labels = ax.get_legend_handles_labels()
67
+ plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='upper right')
68
+ st.pyplot(fig)
69
+
70
+ status = data[data['past_complaint'] == 'Yes'].groupby(['churn_risk_score','complaint_status']).size().reset_index(name='count')
71
+ fig = plt.figure(figsize=(10,8))
72
+ ax = sns.barplot(x='complaint_status', y='count', data=status, hue='churn_risk_score')
73
+ plt.title('Churn Risk based on Complaint Status')
74
+ for i in ax.containers:
75
+ ax.bar_label(i,)
76
+ handles, labels = ax.get_legend_handles_labels()
77
+ plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='upper right')
78
+ st.pyplot(fig)
79
+
80
+ st.markdown('---')
81
+
82
+ st.write('## Feedback')
83
+
84
+ feedback = data.groupby(['churn_risk_score', 'feedback']).size().reset_index(name='count')
85
+ fig = plt.figure(figsize=(10,8))
86
+ ax = sns.barplot(x='count', y='feedback', data=feedback, hue='churn_risk_score')
87
+ plt.title('Churn Risk based on Feedback')
88
+ for i in ax.containers:
89
+ ax.bar_label(i,)
90
+ handles, labels = ax.get_legend_handles_labels()
91
+ plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='lower right')
92
+ st.pyplot(fig)
93
+
94
+ st.markdown('---')
95
+
96
+ st.write('## Membership')
97
+
98
+ membership = data.groupby(['churn_risk_score', 'membership_category']).size().reset_index(name='count')
99
+ fig = plt.figure(figsize=(10,8))
100
+ ax = sns.barplot(x='count', y='membership_category', data=membership, hue='churn_risk_score')
101
+ plt.title("Churn Risk based on user's membership")
102
+ for i in ax.containers:
103
+ ax.bar_label(i,)
104
+ handles, labels = ax.get_legend_handles_labels()
105
+ plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='lower right')
106
+ st.pyplot(fig)
107
+
108
+
109
+ st.markdown('---')
110
+
111
+ st.write('Internet Service used by Customers')
112
+
113
+ service = data.groupby(['internet_option','churn_risk_score']).size().reset_index(name='count')
114
+ fig = plt.figure(figsize=(12,8))
115
+ ax = sns.barplot(x='count', y='internet_option', data=service, hue='churn_risk_score')
116
+ plt.title("Churn Risk based on user's membership")
117
+ for i in ax.containers:
118
+ ax.bar_label(i,)
119
+ handles, labels = ax.get_legend_handles_labels()
120
+ plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='lower right')
121
+ st.pyplot(fig)
122
+
123
+ service = data.groupby(['internet_option','feedback']).size().reset_index(name='count')
124
+ fig = plt.figure(figsize=(30,8))
125
+ ax = sns.barplot(x='internet_option', y='count', data=service, hue='feedback')
126
+ plt.title("Churn Risk based on user's internet option")
127
+ for i in ax.containers:
128
+ ax.bar_label(i,)
129
+ handles, labels = ax.get_legend_handles_labels()
130
+ plt.legend(handles=handles,loc='lower right')
131
+ st.pyplot(fig)
132
+
133
+
134
+ if __name__ == '__main__':
135
+ run()
final_pipeline.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2318547ccbf56024c7a95d40e388c16313c43d09ac82c5e9a36b969a6d5efb3
3
+ size 3226
model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e34d57008fb2b5fe6398e0b8cf172ae44ac5ce604c4cc51e381727d2f340c39
3
+ size 75976
prediction.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import pickle
5
+ from tensorflow.keras.models import load_model
6
+
7
+
8
+ # Load the Models
9
+
10
+ with open('final_pipeline.pkl', 'rb') as file_1:
11
+ model_pipeline = pickle.load(file_1)
12
+
13
+ model_ann = load_model('model.h5')
14
+
15
+ def run():
16
+
17
+ st.title('Wine Quality Prediction')
18
+
19
+
20
+ with st.form(key='form_heart_failure'):
21
+ user = st.text_input('User ID', max_chars=20)
22
+ age = st.number_input('Age', min_value=1, max_value=100, value=25,step=1)
23
+ gender = st.selectbox('Are you a male or female?', ('Male','Female'))
24
+ region = st.selectbox('In which region do you live?', ('City','Town', 'Village'))
25
+ member = st.selectbox('Your level of membership?', ('No Membership', 'Basic Membership', 'Silver Membership', 'Gold Membership', 'Premium Membership', 'Platinum Membership'))
26
+ date = st.text_input('Join date', max_chars=10, help='Please enter with yyyy-mm-dd format')
27
+ referral = st.selectbox('Did you join using referral codes?', ('Yes','No'))
28
+ offer = st.selectbox('What is your preferred offer types?', ('Gift Vouchers/Coupons', 'Credit/Debit Card Offers', 'Without Offers'))
29
+ medium = st.selectbox('Which device are you using?', ('Desktop', 'Smartphone', 'Both'))
30
+ option = st.selectbox('Which product are you using?', ('Wi-Fi', 'Fiber_Optic', 'Mobile_Data'))
31
+ time = st.text_input('Time during last visit to website', max_chars=8, help='Please enter with hh:mm:ss format')
32
+ days = st.number_input('Days since last login', min_value=0, max_value=365, value=10, step=1)
33
+ tspent = st.number_input('Average Time spent on website', min_value=0., max_value=600., value=30., step=.1)
34
+ value = st.number_input('Average Transaction Value', min_value=500., max_value=100000., value=15000., step=.1)
35
+ freq = st.number_input('Login Days Frequency', min_value=0, max_value=90, value=10, step=1)
36
+ point = st.number_input('Pints received', min_value=0., max_value=2500., value=600., step=.1)
37
+ discount = st.selectbox('Did you receive special discount?', ('Yes', 'No'))
38
+ preference = st.selectbox('Do you prefer to receive offers?', ('Yes', 'No'))
39
+ past = st.selectbox('Have you ever submitted a complaint?', ('Yes', 'No'))
40
+ status = st.selectbox('What is the outcome of the comlaints?', ('No Information Available', 'Not Applicable', 'Unsolved', 'Solved', 'Solved in Follow-up'), help='Choose Not Applicable if you have never submitted a complaint')
41
+ feedback = st.selectbox('Your feedback for us?', ('Poor Website', 'Poor Customer Service', 'Too many ads', 'Poor Product Quality', 'No reason specified', 'Products always in Stock', 'Reasonable Price', 'Quality Customer Care', 'User Friendly Website'))
42
+
43
+ submitted = st.form_submit_button('Predict')
44
+
45
+
46
+ data_inf = {
47
+ 'user_id': user,
48
+ 'age': age,
49
+ 'gender': gender,
50
+ 'region_category': region,
51
+ 'membership_category': member,
52
+ 'joining_date': date,
53
+ 'joined_through_referral': referral,
54
+ 'preferred_offer_types': offer,
55
+ 'medium_of_operation': medium,
56
+ 'internet_option': option,
57
+ 'last_visit_time': time,
58
+ 'days_since_last_login' : days,
59
+ 'avg_time_spent' : tspent,
60
+ 'avg_transaction_value' : value,
61
+ 'avg_frequency_login_days' : freq,
62
+ 'points_in_wallet' : point,
63
+ 'used_special_discount' : discount,
64
+ 'offer_application_preference' : preference,
65
+ 'past_complaint' : past,
66
+ 'complaint_status' : status,
67
+ 'feedback' : feedback
68
+ }
69
+
70
+ data_inf = pd.DataFrame([data_inf])
71
+ st.dataframe(data_inf)
72
+
73
+ data_inf['gender'] = data_inf['gender'].replace({'Male': 'M', 'Female': 'F'})
74
+
75
+ if submitted:
76
+ # Transform Inference-Set
77
+
78
+ data_inf_transform = model_pipeline.transform(data_inf)
79
+
80
+ # Predict using Neural Network
81
+
82
+ y_pred_inf = model_ann.predict(data_inf_transform)
83
+ y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
84
+ y_pred_inf = np.where(y_pred_inf == 0, 'No Churn', 'Churn')
85
+ st.write('Hasil prediksi Model : ', y_pred_inf)
86
+
87
+
88
+
89
+ if __name__ == '__main__':
90
+ run()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ seaborn
4
+ matplotlib
5
+ numpy
6
+ scikit-learn==1.2.1
7
+ tensorflow==2.11.0