Spaces:
Runtime error
Runtime error
commit first try
Browse files- app.py +10 -0
- eda.py +125 -0
- final_pipeline.pkl +3 -0
- model.h5 +3 -0
- music.jpg +0 -0
- prediction.py +71 -0
- requirements.txt +9 -0
app.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import eda
|
3 |
+
import prediction
|
4 |
+
|
5 |
+
navigation = st.sidebar.selectbox('Choose Page : ', ('Description','Churn Estimator'))
|
6 |
+
|
7 |
+
if navigation == 'Description':
|
8 |
+
eda.run()
|
9 |
+
else:
|
10 |
+
prediction.run()
|
eda.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import seaborn as sns
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import plotly.express as px
|
6 |
+
from PIL import Image
|
7 |
+
|
8 |
+
st.set_page_config(
|
9 |
+
page_title = 'Customer Churn Predictor'
|
10 |
+
)
|
11 |
+
|
12 |
+
def run():
|
13 |
+
|
14 |
+
# Membuat Title
|
15 |
+
st.title('Customer Churn Predictor')
|
16 |
+
|
17 |
+
#Sub header
|
18 |
+
st.subheader('Description for Customer Churn Predicto')
|
19 |
+
|
20 |
+
# Insert Gambar
|
21 |
+
image = Image.open('music.jpg')
|
22 |
+
st.image(image, caption ='Dengar')
|
23 |
+
|
24 |
+
#description
|
25 |
+
st.write('The goals of this churn estimator')
|
26 |
+
st.write('Dengar adalah sebuah platform streaming music yang ingin memprediksi customer yang akan churn sehingga meminta bantuan data scientist untuk membuat model dalam melakukan prediksi.')
|
27 |
+
st.write('Dengan model ini diharapkan dengar dapat memprediksi customer churn atau tidak sehingga bisa lebih berfokus pada tujuannya.')
|
28 |
+
st.markdown('---')
|
29 |
+
|
30 |
+
st.write('This page is created to show the visualization of the dataset')
|
31 |
+
|
32 |
+
st.markdown('---')
|
33 |
+
|
34 |
+
|
35 |
+
|
36 |
+
|
37 |
+
#show dataframe
|
38 |
+
st.write('Dataset')
|
39 |
+
dup = pd.read_csv('https://raw.githubusercontent.com/FerdiErs/SQL/main/churn.csv')
|
40 |
+
st.dataframe(dup)
|
41 |
+
|
42 |
+
#visualization Function
|
43 |
+
|
44 |
+
def plot_hist(data, title, x_label):
|
45 |
+
#create hist plot
|
46 |
+
fig = plt.figure(figsize=(7, 5))
|
47 |
+
sns.histplot(data, kde=True, bins=20, edgecolor='black')
|
48 |
+
|
49 |
+
#Title and Labels
|
50 |
+
st.title(title)
|
51 |
+
|
52 |
+
st.pyplot(fig)
|
53 |
+
|
54 |
+
def plot_countplot_with_numbers(data, x, hue, title, palette, figsize=(7, 5)):
|
55 |
+
# Create CountPlot
|
56 |
+
fig = plt.figure(figsize=figsize)
|
57 |
+
g = sns.countplot(x=x, hue=hue, data=data, palette=palette)
|
58 |
+
|
59 |
+
# Rotate x labels and move legend outside of the plot
|
60 |
+
g.set_xticklabels(g.get_xticklabels(), rotation=45, ha="right")
|
61 |
+
sns.move_legend(g, "upper left", bbox_to_anchor=(1, 1))
|
62 |
+
|
63 |
+
# Number in visualization
|
64 |
+
for p in g.patches:
|
65 |
+
height = p.get_height()
|
66 |
+
g.annotate(f'{height}', (p.get_x() + p.get_width() / 2., height), ha='center', va='bottom', fontsize=10)
|
67 |
+
|
68 |
+
# Title and labels
|
69 |
+
st.title(title)
|
70 |
+
|
71 |
+
st.pyplot(fig)
|
72 |
+
|
73 |
+
#Age Distribution
|
74 |
+
plot_hist(data=dup['age'], title='Age distribution', x_label='age')
|
75 |
+
|
76 |
+
#Time Spent
|
77 |
+
plot_hist(data=dup['avg_time_spent'], title='Time Spent', x_label='avg_time_spent')
|
78 |
+
|
79 |
+
#pie chart customer region
|
80 |
+
st.write('### Customer Region Distribution')
|
81 |
+
reg = dup.region_category.value_counts()
|
82 |
+
def make_autopct(values):
|
83 |
+
def my_autopct(pct):
|
84 |
+
total = sum(values)
|
85 |
+
val = int(round(pct*total/100.0))
|
86 |
+
return '{p:.2f}% ({v:d})'.format(p=pct,v=val)
|
87 |
+
return my_autopct
|
88 |
+
# Define a custom color palette
|
89 |
+
colors = plt.cm.tab20c.colors
|
90 |
+
fig = plt.figure(figsize=(5,5))
|
91 |
+
reg.plot.pie(autopct=make_autopct(reg), startangle=90, colors=colors)
|
92 |
+
plt.title('Customer Region')
|
93 |
+
plt.axis('equal')
|
94 |
+
st.pyplot(fig)
|
95 |
+
|
96 |
+
#Memberhsip based on Region
|
97 |
+
plot_countplot_with_numbers(x='membership_category',hue='region_category', title='Memberhsip based on Region', data=dup, palette='flare', figsize=(7, 5))
|
98 |
+
|
99 |
+
#membuat pie chart churn risk
|
100 |
+
#count churn
|
101 |
+
ch = dup.churn_risk_score.value_counts()
|
102 |
+
|
103 |
+
# Define a custom color palette
|
104 |
+
colors = plt.cm.Set3.colors
|
105 |
+
|
106 |
+
# plot the data
|
107 |
+
fig = plt.figure(figsize=(5,5))
|
108 |
+
ch.plot.pie(autopct=make_autopct(ch), startangle=90, colors=colors)
|
109 |
+
plt.title('Churn Risk')
|
110 |
+
plt.axis('equal')
|
111 |
+
st.pyplot(fig)
|
112 |
+
|
113 |
+
|
114 |
+
|
115 |
+
#churn risk based on gender
|
116 |
+
plot_countplot_with_numbers(data=dup, x='gender', hue='churn_risk_score', title='Churn Risk based on gender', palette='crest', figsize=(7, 5))
|
117 |
+
|
118 |
+
#churn risk based on membership
|
119 |
+
plot_countplot_with_numbers(data=dup, x='membership_category', hue='churn_risk_score', title='Churn Risk based on Membership', palette='flare', figsize=(7, 5))
|
120 |
+
|
121 |
+
|
122 |
+
|
123 |
+
|
124 |
+
if __name__== '__main__':
|
125 |
+
run()
|
final_pipeline.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2740fd147ce1ac61072c2871295276dc28d9dc3e7b6ffaf4e6db58ca2fa6f6a1
|
3 |
+
size 7024
|
model.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36fcdd283734f3d393b0356cf7b27129df968ea59bcfad9a6fc40197ca70ae65
|
3 |
+
size 157800
|
music.jpg
ADDED
prediction.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import pickle
|
5 |
+
import json
|
6 |
+
import joblib as jb
|
7 |
+
from tensorflow.keras.models import load_model
|
8 |
+
from feature_engine.outliers import Winsorizer
|
9 |
+
|
10 |
+
#load models
|
11 |
+
final_pipeline = jb.load('final_pipeline.pkl')
|
12 |
+
model_ann = load_model('model.h5')
|
13 |
+
|
14 |
+
#load data
|
15 |
+
df = pd.read_csv('https://raw.githubusercontent.com/FerdiErs/SQL/main/churn.csv')
|
16 |
+
|
17 |
+
def run():
|
18 |
+
|
19 |
+
st.markdown("<h1 style='text-align: center;'>Churn predictor</h1>", unsafe_allow_html=True)
|
20 |
+
# description
|
21 |
+
|
22 |
+
st.subheader('Will youre customer churn?.')
|
23 |
+
|
24 |
+
|
25 |
+
with st.form('key=form_prediction') :
|
26 |
+
Age = st.number_input('AGE',min_value=10,max_value=70,step=1)
|
27 |
+
Region = st.selectbox('Region', df['region_category'].unique())
|
28 |
+
Member = st.selectbox('Membership Type', df['membership_category'].unique())
|
29 |
+
offer = st.selectbox('Preferred Offer', sorted(df['preferred_offer_types'].unique()))
|
30 |
+
Internet = st.selectbox('Your Connectivity', sorted(df['internet_option'].unique()))
|
31 |
+
last_login = st.number_input('last login',min_value=0,max_value=365,step=7)
|
32 |
+
time_spent = st.slider('TimeSpent',min_value=0,max_value=10000)
|
33 |
+
transaction_value = st.number_input('Money spent',min_value=10,max_value=99999999,step=1)
|
34 |
+
login_days = st.number_input('login streak',min_value=0,max_value=99999999)
|
35 |
+
points_in_wallet= st.number_input('wallet money',min_value=0,max_value=99999999)
|
36 |
+
past_complaint= st.selectbox('complaint', sorted(df['past_complaint'].unique()))
|
37 |
+
feedback = st.selectbox('feedback', sorted(df['feedback'].unique()))
|
38 |
+
|
39 |
+
|
40 |
+
submitted = st.form_submit_button('Predict')
|
41 |
+
|
42 |
+
data_inf = {
|
43 |
+
'age': Age,
|
44 |
+
'region_category': Region,
|
45 |
+
'membership_category': Member,
|
46 |
+
'preferred_offer_types': offer,
|
47 |
+
'internet_option': Internet,
|
48 |
+
'days_since_last_login': last_login,
|
49 |
+
'avg_time_spent': time_spent,
|
50 |
+
'avg_transaction_value': transaction_value,
|
51 |
+
'avg_frequency_login_days': login_days,
|
52 |
+
'points_in_wallet': points_in_wallet,
|
53 |
+
'past_complaint': past_complaint,
|
54 |
+
'feedback':feedback
|
55 |
+
}
|
56 |
+
|
57 |
+
data_inf = pd.DataFrame([data_inf])
|
58 |
+
st.dataframe(data_inf)
|
59 |
+
|
60 |
+
if submitted:
|
61 |
+
# transfrom data
|
62 |
+
data_inf_transform = final_pipeline.transform(data_inf)
|
63 |
+
|
64 |
+
# Predict using bagging
|
65 |
+
y_pred_inf = model_ann.predict(data_inf_transform)
|
66 |
+
y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
|
67 |
+
y_pred_inf
|
68 |
+
|
69 |
+
|
70 |
+
if __name__=='__main__':
|
71 |
+
run()
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
seaborn
|
4 |
+
matplotlib
|
5 |
+
Pillow
|
6 |
+
numpy
|
7 |
+
tensorflow
|
8 |
+
feature_engine
|
9 |
+
scikit-learn==1.2.2
|