import streamlit as st import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import plotly.express as px st.set_page_config(page_title='Internet Service Provider Customer Churn Dataset Analysis', layout='wide', initial_sidebar_state='expanded') def run(): # Buat Title st.title('EDA on Customer Churn') # Buat Deskripsi st.subheader('Written by Franciscus Andrew Sunanda, FTDS-RMT-018') st.markdown('---') st.write('Dataset : Internet Service Provider Customer Churn') st.write('Objective : To create a model that can predict whether a customers will churn or not') st.write('Evaluation Metrics will be using Recall Score to minimize the False Negatives predicted by the model') st.markdown('---') st.write('## Dataset') data = pd.read_csv('churn.csv') st.dataframe(data) st.markdown('---') st.write('## Checking Balance / Imbalance') churn = data['churn_risk_score'].value_counts() fig = plt.figure() churn.plot(kind='pie', figsize=(10, 8), autopct='%1.1f%%', # untuk membuat persentase labels=None, ) plt.title('Churn/No Churn Numbers in this Dataset') plt.axis('equal') plt.legend(labels=['No Churn', 'Churn']) st.pyplot(fig) st.write('Looks like in this dataset, for classification is already quite balanced between the two possible outcomes') st.markdown('---') st.write('## Complaints') complaint = data.groupby(['churn_risk_score', 'past_complaint']).size().reset_index(name='count') fig = plt.figure(figsize=(10,8)) ax = sns.barplot(x='past_complaint', y='count', data=complaint, hue='churn_risk_score') plt.title('Churn Risk based on Past Complaint') for i in ax.containers: ax.bar_label(i,) handles, labels = ax.get_legend_handles_labels() plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='upper right') st.pyplot(fig) status = data[data['past_complaint'] == 'Yes'].groupby(['churn_risk_score','complaint_status']).size().reset_index(name='count') fig = plt.figure(figsize=(10,8)) ax = sns.barplot(x='complaint_status', y='count', data=status, hue='churn_risk_score') plt.title('Churn Risk based on Complaint Status') for i in ax.containers: ax.bar_label(i,) handles, labels = ax.get_legend_handles_labels() plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='upper right') st.pyplot(fig) st.markdown('---') st.write('## Feedback') feedback = data.groupby(['churn_risk_score', 'feedback']).size().reset_index(name='count') fig = plt.figure(figsize=(10,8)) ax = sns.barplot(x='count', y='feedback', data=feedback, hue='churn_risk_score') plt.title('Churn Risk based on Feedback') for i in ax.containers: ax.bar_label(i,) handles, labels = ax.get_legend_handles_labels() plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='lower right') st.pyplot(fig) st.markdown('---') st.write('## Membership') membership = data.groupby(['churn_risk_score', 'membership_category']).size().reset_index(name='count') fig = plt.figure(figsize=(10,8)) ax = sns.barplot(x='count', y='membership_category', data=membership, hue='churn_risk_score') plt.title("Churn Risk based on user's membership") for i in ax.containers: ax.bar_label(i,) handles, labels = ax.get_legend_handles_labels() plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='lower right') st.pyplot(fig) st.markdown('---') st.write('Internet Service used by Customers') service = data.groupby(['internet_option','churn_risk_score']).size().reset_index(name='count') fig = plt.figure(figsize=(12,8)) ax = sns.barplot(x='count', y='internet_option', data=service, hue='churn_risk_score') plt.title("Churn Risk based on user's membership") for i in ax.containers: ax.bar_label(i,) handles, labels = ax.get_legend_handles_labels() plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='lower right') st.pyplot(fig) service = data.groupby(['internet_option','feedback']).size().reset_index(name='count') fig = plt.figure(figsize=(30,8)) ax = sns.barplot(x='internet_option', y='count', data=service, hue='feedback') plt.title("Churn Risk based on user's internet option") for i in ax.containers: ax.bar_label(i,) handles, labels = ax.get_legend_handles_labels() plt.legend(handles=handles,loc='lower right') st.pyplot(fig) if __name__ == '__main__': run()