Spaces:

FerdiErs
/

Churn_Prediction

Runtime error

App Files Files Community

FerdiErs commited on Jul 27, 2023

Commit

68d3508

1 Parent(s): ac046aa

commit first try

Browse files

Files changed (7) hide show

app.py +10 -0
eda.py +125 -0
final_pipeline.pkl +3 -0
model.h5 +3 -0
music.jpg +0 -0
prediction.py +71 -0
requirements.txt +9 -0

app.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import streamlit as st
+import eda
+import prediction
+navigation = st.sidebar.selectbox('Choose Page : ', ('Description','Churn Estimator'))
+if navigation == 'Description':
+    eda.run()
+else:
+    prediction.run()

eda.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import streamlit as st
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+import plotly.express as px
+from PIL import Image
+st.set_page_config(
+    page_title = 'Customer Churn Predictor'
+)
+def run():
+    # Membuat Title
+    st.title('Customer Churn Predictor')
+    #Sub header
+    st.subheader('Description for Customer Churn Predicto')
+    # Insert Gambar
+    image = Image.open('music.jpg')
+    st.image(image, caption ='Dengar')
+    #description
+    st.write('The goals of this churn estimator')
+    st.write('Dengar adalah sebuah platform streaming music yang ingin memprediksi customer yang akan churn sehingga meminta bantuan data scientist untuk membuat model dalam melakukan prediksi.')
+    st.write('Dengan model ini diharapkan dengar dapat memprediksi customer churn atau tidak sehingga bisa lebih berfokus pada tujuannya.')
+    st.markdown('---')
+    st.write('This page is created to show the visualization of the dataset')
+    st.markdown('---')
+    #show dataframe
+    st.write('Dataset')
+    dup = pd.read_csv('https://raw.githubusercontent.com/FerdiErs/SQL/main/churn.csv')
+    st.dataframe(dup)
+    #visualization Function
+    def plot_hist(data, title, x_label):
+        #create hist plot
+        fig = plt.figure(figsize=(7, 5))
+        sns.histplot(data, kde=True, bins=20, edgecolor='black')
+        #Title and Labels
+        st.title(title)
+        st.pyplot(fig)
+    def plot_countplot_with_numbers(data, x, hue, title, palette, figsize=(7, 5)):
+        # Create CountPlot
+        fig = plt.figure(figsize=figsize)
+        g = sns.countplot(x=x, hue=hue, data=data, palette=palette)
+        # Rotate x labels and move legend outside of the plot
+        g.set_xticklabels(g.get_xticklabels(), rotation=45, ha="right")
+        sns.move_legend(g, "upper left", bbox_to_anchor=(1, 1))
+        # Number in visualization
+        for p in g.patches:
+            height = p.get_height()
+            g.annotate(f'{height}', (p.get_x() + p.get_width() / 2., height), ha='center', va='bottom', fontsize=10)
+        # Title and labels
+        st.title(title)
+        st.pyplot(fig)
+    #Age Distribution
+    plot_hist(data=dup['age'], title='Age distribution', x_label='age')
+    #Time Spent
+    plot_hist(data=dup['avg_time_spent'], title='Time Spent', x_label='avg_time_spent')
+    #pie chart customer region
+    st.write('### Customer Region Distribution')
+    reg = dup.region_category.value_counts()
+    def make_autopct(values):
+        def my_autopct(pct):
+            total = sum(values)
+            val = int(round(pct*total/100.0))
+            return '{p:.2f}%  ({v:d})'.format(p=pct,v=val)
+        return my_autopct
+    # Define a custom color palette
+    colors = plt.cm.tab20c.colors
+    fig = plt.figure(figsize=(5,5))
+    reg.plot.pie(autopct=make_autopct(reg), startangle=90, colors=colors)
+    plt.title('Customer Region')
+    plt.axis('equal')
+    st.pyplot(fig)
+     #Memberhsip based on Region
+    plot_countplot_with_numbers(x='membership_category',hue='region_category', title='Memberhsip based on Region', data=dup, palette='flare', figsize=(7, 5))
+    #membuat pie chart churn risk
+    #count churn
+    ch = dup.churn_risk_score.value_counts()
+    # Define a custom color palette
+    colors = plt.cm.Set3.colors
+    # plot the data
+    fig = plt.figure(figsize=(5,5))
+    ch.plot.pie(autopct=make_autopct(ch), startangle=90, colors=colors)
+    plt.title('Churn Risk')
+    plt.axis('equal')
+    st.pyplot(fig)
+    #churn risk based on gender
+    plot_countplot_with_numbers(data=dup, x='gender', hue='churn_risk_score', title='Churn Risk based on gender', palette='crest', figsize=(7, 5))
+    #churn risk based on membership
+    plot_countplot_with_numbers(data=dup, x='membership_category', hue='churn_risk_score', title='Churn Risk based on Membership', palette='flare', figsize=(7, 5))
+if __name__== '__main__':
+    run()

final_pipeline.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2740fd147ce1ac61072c2871295276dc28d9dc3e7b6ffaf4e6db58ca2fa6f6a1
+size 7024

model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:36fcdd283734f3d393b0356cf7b27129df968ea59bcfad9a6fc40197ca70ae65
+size 157800

music.jpg ADDED Viewed

prediction.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import pickle
+import json
+import joblib as jb
+from tensorflow.keras.models import load_model
+from feature_engine.outliers import Winsorizer
+#load models
+final_pipeline = jb.load('final_pipeline.pkl')
+model_ann = load_model('model.h5')
+#load data
+df = pd.read_csv('https://raw.githubusercontent.com/FerdiErs/SQL/main/churn.csv')
+def run():
+    st.markdown("<h1 style='text-align: center;'>Churn predictor</h1>", unsafe_allow_html=True)
+    # description
+    st.subheader('Will youre customer churn?.')
+    with st.form('key=form_prediction') :
+        Age = st.number_input('AGE',min_value=10,max_value=70,step=1)
+        Region = st.selectbox('Region', df['region_category'].unique())
+        Member = st.selectbox('Membership Type', df['membership_category'].unique())
+        offer = st.selectbox('Preferred Offer', sorted(df['preferred_offer_types'].unique()))
+        Internet = st.selectbox('Your Connectivity', sorted(df['internet_option'].unique()))
+        last_login = st.number_input('last login',min_value=0,max_value=365,step=7)
+        time_spent = st.slider('TimeSpent',min_value=0,max_value=10000)
+        transaction_value = st.number_input('Money spent',min_value=10,max_value=99999999,step=1)
+        login_days = st.number_input('login streak',min_value=0,max_value=99999999)
+        points_in_wallet= st.number_input('wallet money',min_value=0,max_value=99999999)
+        past_complaint= st.selectbox('complaint', sorted(df['past_complaint'].unique()))
+        feedback = st.selectbox('feedback', sorted(df['feedback'].unique()))
+        submitted = st.form_submit_button('Predict')
+    data_inf = {
+        'age': Age,
+        'region_category': Region,
+        'membership_category': Member,
+        'preferred_offer_types': offer,
+        'internet_option': Internet,
+        'days_since_last_login': last_login,
+        'avg_time_spent': time_spent,
+        'avg_transaction_value': transaction_value,
+        'avg_frequency_login_days': login_days,
+        'points_in_wallet': points_in_wallet,
+        'past_complaint': past_complaint,
+        'feedback':feedback
+    }
+    data_inf = pd.DataFrame([data_inf])
+    st.dataframe(data_inf)
+    if submitted:
+        # transfrom data
+        data_inf_transform = final_pipeline.transform(data_inf)
+        # Predict using bagging
+        y_pred_inf = model_ann.predict(data_inf_transform)
+        y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
+        y_pred_inf
+if __name__=='__main__':
+    run()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+streamlit
+pandas
+seaborn
+matplotlib
+Pillow
+numpy
+tensorflow
+feature_engine
+scikit-learn==1.2.2