Jason
commited on
Commit
·
eef1d5d
1
Parent(s):
a2a3f45
first commit
Browse files- app.py +14 -0
- config.ini +8 -0
- eda.py +54 -0
- helpers/__pycache__/plot.cpython-310.pyc +0 -0
- helpers/__pycache__/query.cpython-310.pyc +0 -0
- helpers/plot.py +47 -0
- helpers/query.py +12 -0
- images/sequential_improved_model.png +0 -0
- images/sequential_improved_prediction.png +0 -0
- predictor.py +48 -0
- requirements.txt +9 -0
app.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
import eda
|
4 |
+
import predictor
|
5 |
+
|
6 |
+
navigation = st.sidebar.selectbox(
|
7 |
+
label='Pilih Halaman',
|
8 |
+
options=('EDA', 'Predictor')
|
9 |
+
)
|
10 |
+
|
11 |
+
if navigation == 'EDA':
|
12 |
+
eda.run()
|
13 |
+
elif navigation == 'Predictor':
|
14 |
+
predictor.run()
|
config.ini
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[DEFAULT]
|
2 |
+
production = True
|
3 |
+
|
4 |
+
[DEVELOPMENT]
|
5 |
+
URL = http://127.0.0.1:5000/predic
|
6 |
+
|
7 |
+
[PRODUCTION]
|
8 |
+
URL = https://backend-telco-churn-kidfrom.koyeb.app
|
eda.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from helpers import query, plot
|
3 |
+
from PIL import Image
|
4 |
+
|
5 |
+
st.set_page_config(
|
6 |
+
page_title='Telkomsel - PredictorTelkomsel',
|
7 |
+
layout='wide',
|
8 |
+
initial_sidebar_state='expanded'
|
9 |
+
)
|
10 |
+
|
11 |
+
def run():
|
12 |
+
st.title('Telkomsel Customers Exploratory Data Analysis')
|
13 |
+
|
14 |
+
st.subheader('Exploratory Data Analysis of Telkomsel Customers')
|
15 |
+
|
16 |
+
st.write("This page is made by Jason Rich Darmawan Onggo Putra")
|
17 |
+
|
18 |
+
st.write("Disclaimer: the data set used is not real.")
|
19 |
+
|
20 |
+
df = query.fetch_all_data()
|
21 |
+
|
22 |
+
st.write("## Histogram of categorical features")
|
23 |
+
st.pyplot(fig=plot.plot_categorical_features(df=df))
|
24 |
+
|
25 |
+
st.write("## Pairplot of numerical features")
|
26 |
+
st.pyplot(fig=plot.plot_numerical_features(df))
|
27 |
+
|
28 |
+
st.write("## Model Layers")
|
29 |
+
image = Image.open("./images/sequential_improved_model.png")
|
30 |
+
st.image(image, caption='Sequential Improved Model')
|
31 |
+
|
32 |
+
st.write("## Model Strengths and Weaknesses")
|
33 |
+
image = Image.open("./images/sequential_improved_prediction.png")
|
34 |
+
st.image(image, caption='Sequential Improved Model Strengths and Weaknesses')
|
35 |
+
|
36 |
+
st.markdown(
|
37 |
+
"""
|
38 |
+
We will inform management, to use this model for a specific customer segment which is more predictable, according to the model:
|
39 |
+
1. A customer with one year or two year contract.
|
40 |
+
2. An old customer / customer with tenure above 40 / customer with total charges above 4000.
|
41 |
+
3. A customer without internet service.
|
42 |
+
4. A customer with internet service is unpredictable.
|
43 |
+
|
44 |
+
However, a customer with internet service and 1 related internet service will make the customer more predictable.
|
45 |
+
|
46 |
+
e.g A customer with tech support / online security / online backup.
|
47 |
+
|
48 |
+
5. A customer that pays with Bank Transfer (automatic)
|
49 |
+
6. A customer with monthly charges below 20.
|
50 |
+
|
51 |
+
We will also inform management, not to use this model for a specific customer segment, which is less predictable according to the model:
|
52 |
+
1. A customer that is paying with Electronic check / Mail check / Credit Card (automatic).
|
53 |
+
"""
|
54 |
+
)
|
helpers/__pycache__/plot.cpython-310.pyc
ADDED
Binary file (1.68 kB). View file
|
|
helpers/__pycache__/query.cpython-310.pyc
ADDED
Binary file (529 Bytes). View file
|
|
helpers/plot.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import matplotlib.pyplot as plt
|
2 |
+
import seaborn as sns
|
3 |
+
import pandas as pd
|
4 |
+
|
5 |
+
def __histplot(features, target, df):
|
6 |
+
"""
|
7 |
+
Usage: features parameter should be nominal features.
|
8 |
+
"""
|
9 |
+
fig, axes = plt.subplots(4,4, figsize=(12,8), layout='constrained')
|
10 |
+
|
11 |
+
for feature, ax in zip(features, axes.ravel()):
|
12 |
+
sns.histplot(data=df, x=feature, hue=target, multiple='dodge',
|
13 |
+
stat='percent', common_norm=False,
|
14 |
+
ax=ax)
|
15 |
+
if feature != 'gender':
|
16 |
+
ax.get_legend().set_visible(False)
|
17 |
+
if feature in [
|
18 |
+
'MultipleLines', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
|
19 |
+
'TechSupport', 'StreamingTV', 'StreamingMovies', 'PaymentMethod',
|
20 |
+
'Contract'
|
21 |
+
]:
|
22 |
+
ax.set_xticks(ax.get_xticks())
|
23 |
+
ax.set_xticklabels(ax.get_xticklabels(), rotation=15, ha='right')
|
24 |
+
|
25 |
+
return fig
|
26 |
+
|
27 |
+
def plot_categorical_features(df):
|
28 |
+
return __histplot(
|
29 |
+
features=[
|
30 |
+
'gender', 'SeniorCitizen', 'Partner', 'Dependents',
|
31 |
+
'PhoneService', 'MultipleLines', 'InternetService',
|
32 |
+
'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
|
33 |
+
'TechSupport', 'StreamingTV', 'StreamingMovies',
|
34 |
+
'PaperlessBilling', 'PaymentMethod', 'Contract'
|
35 |
+
],
|
36 |
+
target='Churn',
|
37 |
+
df=df
|
38 |
+
)
|
39 |
+
|
40 |
+
def plot_numerical_features(df):
|
41 |
+
df_c = df.copy()
|
42 |
+
df_c['TotalCharges'] = pd.to_numeric(df_c['TotalCharges'], errors='coerce')
|
43 |
+
|
44 |
+
return sns.pairplot(
|
45 |
+
data=df_c[['tenure', 'MonthlyCharges', 'TotalCharges', 'Churn']],
|
46 |
+
hue='Churn'
|
47 |
+
)
|
helpers/query.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
def fetch_all_data() -> pd.DataFrame:
|
5 |
+
URL = "http://127.0.0.1:5000/query"
|
6 |
+
r = requests.get(URL)
|
7 |
+
|
8 |
+
if r.status_code == 200:
|
9 |
+
res = r.json()
|
10 |
+
return pd.DataFrame(res)
|
11 |
+
else:
|
12 |
+
return ('Error with status code ', str(r.status_code))
|
images/sequential_improved_model.png
ADDED
![]() |
images/sequential_improved_prediction.png
ADDED
![]() |
predictor.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import requests
|
3 |
+
import pandas as pd
|
4 |
+
import streamlit_toggle as tog
|
5 |
+
from configparser import ConfigParser
|
6 |
+
|
7 |
+
str_to_array = lambda items: [item.strip("\" ") for item in items.split(',')]
|
8 |
+
|
9 |
+
config = ConfigParser()
|
10 |
+
config.read('./config.ini')
|
11 |
+
|
12 |
+
if config['DEFAULT']['production'] == 'True':
|
13 |
+
URL = config['PRODUCTION']['URL']
|
14 |
+
else:
|
15 |
+
URL = config['DEVELOPMENT']['URL']
|
16 |
+
|
17 |
+
def run():
|
18 |
+
with st.form(key='predictor'):
|
19 |
+
customerID = st.text_input(
|
20 |
+
label="Customer IDs, separate it with a comma"
|
21 |
+
)
|
22 |
+
fetch_customer_data = tog.st_toggle_switch(
|
23 |
+
label="Fetch the customer data",
|
24 |
+
)
|
25 |
+
|
26 |
+
submitted = st.form_submit_button('Predict')
|
27 |
+
|
28 |
+
if submitted:
|
29 |
+
customerID_final = str_to_array(customerID)
|
30 |
+
print("[DEBUG] customerID:", customerID_final)
|
31 |
+
|
32 |
+
r = requests.post(
|
33 |
+
URL+"/predict",
|
34 |
+
json={
|
35 |
+
"customerID": customerID_final,
|
36 |
+
"fetch_customer_data": fetch_customer_data
|
37 |
+
}
|
38 |
+
)
|
39 |
+
|
40 |
+
if r.status_code == 200:
|
41 |
+
res = r.json()
|
42 |
+
st.dataframe(pd.DataFrame(res))
|
43 |
+
else:
|
44 |
+
st.write('Error with status code ', str(r.status_code))
|
45 |
+
|
46 |
+
# If we deploy the app, __name__ is app.
|
47 |
+
if __name__ == '__main__':
|
48 |
+
run()
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
streamlit_toggle
|
3 |
+
|
4 |
+
pandas
|
5 |
+
matplotlib
|
6 |
+
seaborn
|
7 |
+
|
8 |
+
# pillow is the package name for PIL
|
9 |
+
pillow
|