FerdiErs commited on
Commit
68d3508
1 Parent(s): ac046aa

commit first try

Browse files
Files changed (7) hide show
  1. app.py +10 -0
  2. eda.py +125 -0
  3. final_pipeline.pkl +3 -0
  4. model.h5 +3 -0
  5. music.jpg +0 -0
  6. prediction.py +71 -0
  7. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import eda
3
+ import prediction
4
+
5
+ navigation = st.sidebar.selectbox('Choose Page : ', ('Description','Churn Estimator'))
6
+
7
+ if navigation == 'Description':
8
+ eda.run()
9
+ else:
10
+ prediction.run()
eda.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import matplotlib.pyplot as plt
5
+ import plotly.express as px
6
+ from PIL import Image
7
+
8
+ st.set_page_config(
9
+ page_title = 'Customer Churn Predictor'
10
+ )
11
+
12
+ def run():
13
+
14
+ # Membuat Title
15
+ st.title('Customer Churn Predictor')
16
+
17
+ #Sub header
18
+ st.subheader('Description for Customer Churn Predicto')
19
+
20
+ # Insert Gambar
21
+ image = Image.open('music.jpg')
22
+ st.image(image, caption ='Dengar')
23
+
24
+ #description
25
+ st.write('The goals of this churn estimator')
26
+ st.write('Dengar adalah sebuah platform streaming music yang ingin memprediksi customer yang akan churn sehingga meminta bantuan data scientist untuk membuat model dalam melakukan prediksi.')
27
+ st.write('Dengan model ini diharapkan dengar dapat memprediksi customer churn atau tidak sehingga bisa lebih berfokus pada tujuannya.')
28
+ st.markdown('---')
29
+
30
+ st.write('This page is created to show the visualization of the dataset')
31
+
32
+ st.markdown('---')
33
+
34
+
35
+
36
+
37
+ #show dataframe
38
+ st.write('Dataset')
39
+ dup = pd.read_csv('https://raw.githubusercontent.com/FerdiErs/SQL/main/churn.csv')
40
+ st.dataframe(dup)
41
+
42
+ #visualization Function
43
+
44
+ def plot_hist(data, title, x_label):
45
+ #create hist plot
46
+ fig = plt.figure(figsize=(7, 5))
47
+ sns.histplot(data, kde=True, bins=20, edgecolor='black')
48
+
49
+ #Title and Labels
50
+ st.title(title)
51
+
52
+ st.pyplot(fig)
53
+
54
+ def plot_countplot_with_numbers(data, x, hue, title, palette, figsize=(7, 5)):
55
+ # Create CountPlot
56
+ fig = plt.figure(figsize=figsize)
57
+ g = sns.countplot(x=x, hue=hue, data=data, palette=palette)
58
+
59
+ # Rotate x labels and move legend outside of the plot
60
+ g.set_xticklabels(g.get_xticklabels(), rotation=45, ha="right")
61
+ sns.move_legend(g, "upper left", bbox_to_anchor=(1, 1))
62
+
63
+ # Number in visualization
64
+ for p in g.patches:
65
+ height = p.get_height()
66
+ g.annotate(f'{height}', (p.get_x() + p.get_width() / 2., height), ha='center', va='bottom', fontsize=10)
67
+
68
+ # Title and labels
69
+ st.title(title)
70
+
71
+ st.pyplot(fig)
72
+
73
+ #Age Distribution
74
+ plot_hist(data=dup['age'], title='Age distribution', x_label='age')
75
+
76
+ #Time Spent
77
+ plot_hist(data=dup['avg_time_spent'], title='Time Spent', x_label='avg_time_spent')
78
+
79
+ #pie chart customer region
80
+ st.write('### Customer Region Distribution')
81
+ reg = dup.region_category.value_counts()
82
+ def make_autopct(values):
83
+ def my_autopct(pct):
84
+ total = sum(values)
85
+ val = int(round(pct*total/100.0))
86
+ return '{p:.2f}% ({v:d})'.format(p=pct,v=val)
87
+ return my_autopct
88
+ # Define a custom color palette
89
+ colors = plt.cm.tab20c.colors
90
+ fig = plt.figure(figsize=(5,5))
91
+ reg.plot.pie(autopct=make_autopct(reg), startangle=90, colors=colors)
92
+ plt.title('Customer Region')
93
+ plt.axis('equal')
94
+ st.pyplot(fig)
95
+
96
+ #Memberhsip based on Region
97
+ plot_countplot_with_numbers(x='membership_category',hue='region_category', title='Memberhsip based on Region', data=dup, palette='flare', figsize=(7, 5))
98
+
99
+ #membuat pie chart churn risk
100
+ #count churn
101
+ ch = dup.churn_risk_score.value_counts()
102
+
103
+ # Define a custom color palette
104
+ colors = plt.cm.Set3.colors
105
+
106
+ # plot the data
107
+ fig = plt.figure(figsize=(5,5))
108
+ ch.plot.pie(autopct=make_autopct(ch), startangle=90, colors=colors)
109
+ plt.title('Churn Risk')
110
+ plt.axis('equal')
111
+ st.pyplot(fig)
112
+
113
+
114
+
115
+ #churn risk based on gender
116
+ plot_countplot_with_numbers(data=dup, x='gender', hue='churn_risk_score', title='Churn Risk based on gender', palette='crest', figsize=(7, 5))
117
+
118
+ #churn risk based on membership
119
+ plot_countplot_with_numbers(data=dup, x='membership_category', hue='churn_risk_score', title='Churn Risk based on Membership', palette='flare', figsize=(7, 5))
120
+
121
+
122
+
123
+
124
+ if __name__== '__main__':
125
+ run()
final_pipeline.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2740fd147ce1ac61072c2871295276dc28d9dc3e7b6ffaf4e6db58ca2fa6f6a1
3
+ size 7024
model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36fcdd283734f3d393b0356cf7b27129df968ea59bcfad9a6fc40197ca70ae65
3
+ size 157800
music.jpg ADDED
prediction.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import pickle
5
+ import json
6
+ import joblib as jb
7
+ from tensorflow.keras.models import load_model
8
+ from feature_engine.outliers import Winsorizer
9
+
10
+ #load models
11
+ final_pipeline = jb.load('final_pipeline.pkl')
12
+ model_ann = load_model('model.h5')
13
+
14
+ #load data
15
+ df = pd.read_csv('https://raw.githubusercontent.com/FerdiErs/SQL/main/churn.csv')
16
+
17
+ def run():
18
+
19
+ st.markdown("<h1 style='text-align: center;'>Churn predictor</h1>", unsafe_allow_html=True)
20
+ # description
21
+
22
+ st.subheader('Will youre customer churn?.')
23
+
24
+
25
+ with st.form('key=form_prediction') :
26
+ Age = st.number_input('AGE',min_value=10,max_value=70,step=1)
27
+ Region = st.selectbox('Region', df['region_category'].unique())
28
+ Member = st.selectbox('Membership Type', df['membership_category'].unique())
29
+ offer = st.selectbox('Preferred Offer', sorted(df['preferred_offer_types'].unique()))
30
+ Internet = st.selectbox('Your Connectivity', sorted(df['internet_option'].unique()))
31
+ last_login = st.number_input('last login',min_value=0,max_value=365,step=7)
32
+ time_spent = st.slider('TimeSpent',min_value=0,max_value=10000)
33
+ transaction_value = st.number_input('Money spent',min_value=10,max_value=99999999,step=1)
34
+ login_days = st.number_input('login streak',min_value=0,max_value=99999999)
35
+ points_in_wallet= st.number_input('wallet money',min_value=0,max_value=99999999)
36
+ past_complaint= st.selectbox('complaint', sorted(df['past_complaint'].unique()))
37
+ feedback = st.selectbox('feedback', sorted(df['feedback'].unique()))
38
+
39
+
40
+ submitted = st.form_submit_button('Predict')
41
+
42
+ data_inf = {
43
+ 'age': Age,
44
+ 'region_category': Region,
45
+ 'membership_category': Member,
46
+ 'preferred_offer_types': offer,
47
+ 'internet_option': Internet,
48
+ 'days_since_last_login': last_login,
49
+ 'avg_time_spent': time_spent,
50
+ 'avg_transaction_value': transaction_value,
51
+ 'avg_frequency_login_days': login_days,
52
+ 'points_in_wallet': points_in_wallet,
53
+ 'past_complaint': past_complaint,
54
+ 'feedback':feedback
55
+ }
56
+
57
+ data_inf = pd.DataFrame([data_inf])
58
+ st.dataframe(data_inf)
59
+
60
+ if submitted:
61
+ # transfrom data
62
+ data_inf_transform = final_pipeline.transform(data_inf)
63
+
64
+ # Predict using bagging
65
+ y_pred_inf = model_ann.predict(data_inf_transform)
66
+ y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
67
+ y_pred_inf
68
+
69
+
70
+ if __name__=='__main__':
71
+ run()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ seaborn
4
+ matplotlib
5
+ Pillow
6
+ numpy
7
+ tensorflow
8
+ feature_engine
9
+ scikit-learn==1.2.2