Spaces:
Sleeping
Sleeping
import streamlit as st | |
import numpy as np | |
import pandas as pd | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
import plotly.express as px | |
st.set_page_config(page_title='Internet Service Provider Customer Churn Dataset Analysis', layout='wide', initial_sidebar_state='expanded') | |
def run(): | |
# Buat Title | |
st.title('EDA on Customer Churn') | |
# Buat Deskripsi | |
st.subheader('Written by Franciscus Andrew Sunanda, FTDS-RMT-018') | |
st.markdown('---') | |
st.write('Dataset : Internet Service Provider Customer Churn') | |
st.write('Objective : To create a model that can predict whether a customers will churn or not') | |
st.write('Evaluation Metrics will be using Recall Score to minimize the False Negatives predicted by the model') | |
st.markdown('---') | |
st.write('## Dataset') | |
data = pd.read_csv('churn.csv') | |
st.dataframe(data) | |
st.markdown('---') | |
st.write('## Checking Balance / Imbalance') | |
churn = data['churn_risk_score'].value_counts() | |
fig = plt.figure() | |
churn.plot(kind='pie', | |
figsize=(10, 8), | |
autopct='%1.1f%%', # untuk membuat persentase | |
labels=None, | |
) | |
plt.title('Churn/No Churn Numbers in this Dataset') | |
plt.axis('equal') | |
plt.legend(labels=['No Churn', 'Churn']) | |
st.pyplot(fig) | |
st.write('Looks like in this dataset, for classification is already quite balanced between the two possible outcomes') | |
st.markdown('---') | |
st.write('## Complaints') | |
complaint = data.groupby(['churn_risk_score', 'past_complaint']).size().reset_index(name='count') | |
fig = plt.figure(figsize=(10,8)) | |
ax = sns.barplot(x='past_complaint', y='count', data=complaint, hue='churn_risk_score') | |
plt.title('Churn Risk based on Past Complaint') | |
for i in ax.containers: | |
ax.bar_label(i,) | |
handles, labels = ax.get_legend_handles_labels() | |
plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='upper right') | |
st.pyplot(fig) | |
status = data[data['past_complaint'] == 'Yes'].groupby(['churn_risk_score','complaint_status']).size().reset_index(name='count') | |
fig = plt.figure(figsize=(10,8)) | |
ax = sns.barplot(x='complaint_status', y='count', data=status, hue='churn_risk_score') | |
plt.title('Churn Risk based on Complaint Status') | |
for i in ax.containers: | |
ax.bar_label(i,) | |
handles, labels = ax.get_legend_handles_labels() | |
plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='upper right') | |
st.pyplot(fig) | |
st.markdown('---') | |
st.write('## Feedback') | |
feedback = data.groupby(['churn_risk_score', 'feedback']).size().reset_index(name='count') | |
fig = plt.figure(figsize=(10,8)) | |
ax = sns.barplot(x='count', y='feedback', data=feedback, hue='churn_risk_score') | |
plt.title('Churn Risk based on Feedback') | |
for i in ax.containers: | |
ax.bar_label(i,) | |
handles, labels = ax.get_legend_handles_labels() | |
plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='lower right') | |
st.pyplot(fig) | |
st.markdown('---') | |
st.write('## Membership') | |
membership = data.groupby(['churn_risk_score', 'membership_category']).size().reset_index(name='count') | |
fig = plt.figure(figsize=(10,8)) | |
ax = sns.barplot(x='count', y='membership_category', data=membership, hue='churn_risk_score') | |
plt.title("Churn Risk based on user's membership") | |
for i in ax.containers: | |
ax.bar_label(i,) | |
handles, labels = ax.get_legend_handles_labels() | |
plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='lower right') | |
st.pyplot(fig) | |
st.markdown('---') | |
st.write('Internet Service used by Customers') | |
service = data.groupby(['internet_option','churn_risk_score']).size().reset_index(name='count') | |
fig = plt.figure(figsize=(12,8)) | |
ax = sns.barplot(x='count', y='internet_option', data=service, hue='churn_risk_score') | |
plt.title("Churn Risk based on user's membership") | |
for i in ax.containers: | |
ax.bar_label(i,) | |
handles, labels = ax.get_legend_handles_labels() | |
plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='lower right') | |
st.pyplot(fig) | |
service = data.groupby(['internet_option','feedback']).size().reset_index(name='count') | |
fig = plt.figure(figsize=(30,8)) | |
ax = sns.barplot(x='internet_option', y='count', data=service, hue='feedback') | |
plt.title("Churn Risk based on user's internet option") | |
for i in ax.containers: | |
ax.bar_label(i,) | |
handles, labels = ax.get_legend_handles_labels() | |
plt.legend(handles=handles,loc='lower right') | |
st.pyplot(fig) | |
if __name__ == '__main__': | |
run() |