Spaces:

andrewsunanda
/

churn_prediction

Sleeping

App Files Files Community

andrewsunanda commited on Mar 30, 2023

Commit

40db5de

1 Parent(s): 416d54c

Upload Model

Browse files

Files changed (7) hide show

app.py +12 -0
churn.csv +0 -0
eda.py +135 -0
final_pipeline.pkl +3 -0
model.h5 +3 -0
prediction.py +90 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import streamlit as st
+import eda
+import prediction
+navigation = st.sidebar.selectbox('Pilih Halaman', ('EDA', 'Predict'))
+if navigation == 'EDA':
+    eda.run()
+else:
+    prediction.run()

churn.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

eda.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import streamlit as st
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+import plotly.express as px
+st.set_page_config(page_title='Internet Service Provider Customer Churn Dataset Analysis', layout='wide', initial_sidebar_state='expanded')
+def run():
+    # Buat Title
+    st.title('EDA on Customer Churn')
+    # Buat Deskripsi
+    st.subheader('Written by Franciscus Andrew Sunanda, FTDS-RMT-018')
+    st.markdown('---')
+    st.write('Dataset         : Internet Service Provider Customer Churn')
+    st.write('Objective       : To create a model that can predict whether a customers will churn or not')
+    st.write('Evaluation Metrics will be using Recall Score to minimize the False Negatives predicted by the model')
+    st.markdown('---')
+    st.write('## Dataset')
+    data = pd.read_csv('churn.csv')
+    st.dataframe(data)
+    st.markdown('---')
+    st.write('## Checking Balance / Imbalance')
+    churn = data['churn_risk_score'].value_counts()
+    fig = plt.figure()
+    churn.plot(kind='pie',
+        figsize=(10, 8),
+        autopct='%1.1f%%', # untuk membuat persentase
+        labels=None,
+        )
+    plt.title('Churn/No Churn Numbers in this Dataset')
+    plt.axis('equal')
+    plt.legend(labels=['No Churn', 'Churn'])
+    st.pyplot(fig)
+    st.write('Looks like in this dataset, for classification is already quite balanced between the two possible outcomes')
+    st.markdown('---')
+    st.write('## Complaints')
+    complaint = data.groupby(['churn_risk_score', 'past_complaint']).size().reset_index(name='count')
+    fig = plt.figure(figsize=(10,8))
+    ax = sns.barplot(x='past_complaint', y='count', data=complaint, hue='churn_risk_score')
+    plt.title('Churn Risk based on Past Complaint')
+    for i in ax.containers:
+        ax.bar_label(i,)
+    handles, labels = ax.get_legend_handles_labels()
+    plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='upper right')
+    st.pyplot(fig)
+    status = data[data['past_complaint'] == 'Yes'].groupby(['churn_risk_score','complaint_status']).size().reset_index(name='count')
+    fig = plt.figure(figsize=(10,8))
+    ax = sns.barplot(x='complaint_status', y='count', data=status, hue='churn_risk_score')
+    plt.title('Churn Risk based on Complaint Status')
+    for i in ax.containers:
+        ax.bar_label(i,)
+    handles, labels = ax.get_legend_handles_labels()
+    plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='upper right')
+    st.pyplot(fig)
+    st.markdown('---')
+    st.write('## Feedback')
+    feedback = data.groupby(['churn_risk_score', 'feedback']).size().reset_index(name='count')
+    fig = plt.figure(figsize=(10,8))
+    ax = sns.barplot(x='count', y='feedback', data=feedback, hue='churn_risk_score')
+    plt.title('Churn Risk based on Feedback')
+    for i in ax.containers:
+        ax.bar_label(i,)
+    handles, labels = ax.get_legend_handles_labels()
+    plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='lower right')
+    st.pyplot(fig)
+    st.markdown('---')
+    st.write('## Membership')
+    membership = data.groupby(['churn_risk_score', 'membership_category']).size().reset_index(name='count')
+    fig = plt.figure(figsize=(10,8))
+    ax = sns.barplot(x='count', y='membership_category', data=membership, hue='churn_risk_score')
+    plt.title("Churn Risk based on user's membership")
+    for i in ax.containers:
+        ax.bar_label(i,)
+    handles, labels = ax.get_legend_handles_labels()
+    plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='lower right')
+    st.pyplot(fig)
+    st.markdown('---')
+    st.write('Internet Service used by Customers')
+    service = data.groupby(['internet_option','churn_risk_score']).size().reset_index(name='count')
+    fig = plt.figure(figsize=(12,8))
+    ax = sns.barplot(x='count', y='internet_option', data=service, hue='churn_risk_score')
+    plt.title("Churn Risk based on user's membership")
+    for i in ax.containers:
+        ax.bar_label(i,)
+    handles, labels = ax.get_legend_handles_labels()
+    plt.legend(handles=handles, labels=['No Churn', 'Churn'], loc='lower right')
+    st.pyplot(fig)
+    service = data.groupby(['internet_option','feedback']).size().reset_index(name='count')
+    fig = plt.figure(figsize=(30,8))
+    ax = sns.barplot(x='internet_option', y='count', data=service, hue='feedback')
+    plt.title("Churn Risk based on user's internet option")
+    for i in ax.containers:
+        ax.bar_label(i,)
+    handles, labels = ax.get_legend_handles_labels()
+    plt.legend(handles=handles,loc='lower right')
+    st.pyplot(fig)
+if __name__ == '__main__':
+    run()

final_pipeline.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2318547ccbf56024c7a95d40e388c16313c43d09ac82c5e9a36b969a6d5efb3
+size 3226

model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e34d57008fb2b5fe6398e0b8cf172ae44ac5ce604c4cc51e381727d2f340c39
+size 75976

prediction.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import pickle
+from tensorflow.keras.models import load_model
+# Load the Models
+with open('final_pipeline.pkl', 'rb') as file_1:
+  model_pipeline = pickle.load(file_1)
+model_ann = load_model('model.h5')
+def run():
+    st.title('Wine Quality Prediction')
+    with st.form(key='form_heart_failure'):
+        user = st.text_input('User ID', max_chars=20)
+        age = st.number_input('Age', min_value=1, max_value=100, value=25,step=1)
+        gender = st.selectbox('Are you a male or female?', ('Male','Female'))
+        region = st.selectbox('In which region do you live?', ('City','Town', 'Village'))
+        member = st.selectbox('Your level of membership?', ('No Membership', 'Basic Membership', 'Silver Membership', 'Gold Membership', 'Premium Membership', 'Platinum Membership'))
+        date = st.text_input('Join date', max_chars=10, help='Please enter with yyyy-mm-dd format')
+        referral = st.selectbox('Did you join using referral codes?', ('Yes','No'))
+        offer = st.selectbox('What is your preferred offer types?', ('Gift Vouchers/Coupons', 'Credit/Debit Card Offers', 'Without Offers'))
+        medium = st.selectbox('Which device are you using?', ('Desktop', 'Smartphone', 'Both'))
+        option = st.selectbox('Which product are you using?', ('Wi-Fi', 'Fiber_Optic', 'Mobile_Data'))
+        time = st.text_input('Time during last visit to website', max_chars=8, help='Please enter with hh:mm:ss format')
+        days = st.number_input('Days since last login', min_value=0, max_value=365, value=10, step=1)
+        tspent = st.number_input('Average Time spent on website', min_value=0., max_value=600., value=30., step=.1)
+        value = st.number_input('Average Transaction Value', min_value=500., max_value=100000., value=15000., step=.1)
+        freq = st.number_input('Login Days Frequency', min_value=0, max_value=90, value=10, step=1)
+        point = st.number_input('Pints received', min_value=0., max_value=2500., value=600., step=.1)
+        discount = st.selectbox('Did you receive special discount?', ('Yes', 'No'))
+        preference = st.selectbox('Do you prefer to receive offers?', ('Yes', 'No'))
+        past = st.selectbox('Have you ever submitted a complaint?', ('Yes', 'No'))
+        status = st.selectbox('What is the outcome of the comlaints?', ('No Information Available', 'Not Applicable', 'Unsolved', 'Solved', 'Solved in Follow-up'), help='Choose Not Applicable if you have never submitted a complaint')
+        feedback = st.selectbox('Your feedback for us?', ('Poor Website', 'Poor Customer Service', 'Too many ads', 'Poor Product Quality', 'No reason specified', 'Products always in Stock', 'Reasonable Price', 'Quality Customer Care', 'User Friendly Website'))
+        submitted = st.form_submit_button('Predict')
+    data_inf = {
+    'user_id': user,
+    'age': age,
+    'gender': gender,
+    'region_category': region,
+    'membership_category': member,
+    'joining_date': date,
+    'joined_through_referral': referral,
+    'preferred_offer_types': offer,
+    'medium_of_operation': medium,
+    'internet_option': option,
+    'last_visit_time': time,
+    'days_since_last_login' : days,
+    'avg_time_spent' : tspent,
+    'avg_transaction_value' : value,
+    'avg_frequency_login_days' : freq,
+    'points_in_wallet' : point,
+    'used_special_discount' : discount,
+    'offer_application_preference' : preference,
+    'past_complaint' : past,
+    'complaint_status' : status,
+    'feedback' : feedback
+    }
+    data_inf = pd.DataFrame([data_inf])
+    st.dataframe(data_inf)
+    data_inf['gender'] = data_inf['gender'].replace({'Male': 'M', 'Female': 'F'})
+    if submitted:
+        # Transform Inference-Set
+        data_inf_transform = model_pipeline.transform(data_inf)
+        # Predict using Neural Network
+        y_pred_inf = model_ann.predict(data_inf_transform)
+        y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
+        y_pred_inf = np.where(y_pred_inf == 0, 'No Churn', 'Churn')
+        st.write('Hasil prediksi Model : ', y_pred_inf)
+if __name__ == '__main__':
+    run()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+streamlit
+pandas
+seaborn
+matplotlib
+numpy
+scikit-learn==1.2.1
+tensorflow==2.11.0