Spaces:

andrewsunanda
/

churn_prediction

Sleeping

File size: 4,681 Bytes

40db5de

import streamlit as st
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

st.set_page_config(page_title='Internet Service Provider Customer Churn Dataset Analysis', layout='wide', initial_sidebar_state='expanded')

def run():

    # Buat Title
    st.title('EDA on Customer Churn')

    # Buat Deskripsi
    st.subheader('Written by Franciscus Andrew Sunanda, FTDS-RMT-018')

    st.markdown('---')

    
    st.write('Dataset         : Internet Service Provider Customer Churn')

    st.write('Objective       : To create a model that can predict whether a customers will churn or not')

    st.write('Evaluation Metrics will be using Recall Score to minimize the False Negatives predicted by the model')
    

    st.markdown('---')

    st.write('## Dataset')
    data = pd.read_csv('churn.csv')
    
    st.dataframe(data)

    
    st.markdown('---')

    st.write('## Checking Balance / Imbalance')

    churn = data['churn_risk_score'].value_counts()

    fig = plt.figure()
    churn.plot(kind='pie',
        figsize=(10, 8),
        autopct='%1.1f%%', # untuk membuat persentase  
        labels=None,
        )

    plt.title('Churn/No Churn Numbers in this Dataset')
    plt.axis('equal') 
    plt.legend(labels=['No Churn', 'Churn'])
    st.pyplot(fig)

    st.write('Looks like in this dataset, for classification is already quite balanced between the two possible outcomes')

    st.markdown('---')

    st.write('## Complaints')

    complaint = data.groupby(['churn_risk_score', 'past_complaint']).size().reset_index(name='count')
    fig = plt.figure(figsize=(10,8))
    ax = sns.barplot(x='past_complaint', y='count', data=complaint, hue='churn_risk_score')
    plt.title('Churn Risk based on Past Complaint')
    for i in ax.containers:
        ax.bar_label(i,)  
    handles, labels = ax.get_legend_handles_labels()
    plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='upper right')
    st.pyplot(fig)

    status = data[data['past_complaint'] == 'Yes'].groupby(['churn_risk_score','complaint_status']).size().reset_index(name='count')
    fig = plt.figure(figsize=(10,8))
    ax = sns.barplot(x='complaint_status', y='count', data=status, hue='churn_risk_score')
    plt.title('Churn Risk based on Complaint Status')
    for i in ax.containers:
        ax.bar_label(i,)
    handles, labels = ax.get_legend_handles_labels()
    plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='upper right')
    st.pyplot(fig)

    st.markdown('---')

    st.write('## Feedback')

    feedback = data.groupby(['churn_risk_score', 'feedback']).size().reset_index(name='count')
    fig = plt.figure(figsize=(10,8))
    ax = sns.barplot(x='count', y='feedback', data=feedback, hue='churn_risk_score')
    plt.title('Churn Risk based on Feedback')
    for i in ax.containers:
        ax.bar_label(i,)  
    handles, labels = ax.get_legend_handles_labels()
    plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='lower right')
    st.pyplot(fig)

    st.markdown('---')

    st.write('## Membership')

    membership = data.groupby(['churn_risk_score', 'membership_category']).size().reset_index(name='count')
    fig = plt.figure(figsize=(10,8))
    ax = sns.barplot(x='count', y='membership_category', data=membership, hue='churn_risk_score')
    plt.title("Churn Risk based on user's membership")
    for i in ax.containers:
        ax.bar_label(i,)  
    handles, labels = ax.get_legend_handles_labels()
    plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='lower right')
    st.pyplot(fig)

    
    st.markdown('---')

    st.write('Internet Service used by Customers')

    service = data.groupby(['internet_option','churn_risk_score']).size().reset_index(name='count')
    fig = plt.figure(figsize=(12,8))
    ax = sns.barplot(x='count', y='internet_option', data=service, hue='churn_risk_score')
    plt.title("Churn Risk based on user's membership")
    for i in ax.containers:
        ax.bar_label(i,)  
    handles, labels = ax.get_legend_handles_labels()
    plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='lower right')
    st.pyplot(fig)

    service = data.groupby(['internet_option','feedback']).size().reset_index(name='count')
    fig = plt.figure(figsize=(30,8))
    ax = sns.barplot(x='internet_option', y='count', data=service, hue='feedback')
    plt.title("Churn Risk based on user's internet option")
    for i in ax.containers:
        ax.bar_label(i,)  
    handles, labels = ax.get_legend_handles_labels()
    plt.legend(handles=handles,loc='lower right')
    st.pyplot(fig)

    
if __name__ == '__main__':
    run()