Spaces:
Runtime error
Runtime error
Ajeet Singh Raina
commited on
Commit
·
8cb8488
1
Parent(s):
bca0fbc
Added
Browse files- Dockerfile +9 -8
- Pipfile +18 -0
- requirements.txt +2 -0
- stream_app.py +84 -0
- train.py +131 -0
Dockerfile
CHANGED
@@ -1,14 +1,15 @@
|
|
1 |
-
|
2 |
-
# you will also find guides on how best to write your Dockerfile
|
3 |
|
4 |
-
|
5 |
|
6 |
-
WORKDIR /
|
7 |
|
8 |
-
COPY
|
9 |
|
10 |
-
RUN pip install
|
11 |
|
12 |
-
|
|
|
|
|
13 |
|
14 |
-
CMD ["
|
|
|
1 |
+
FROM python:3.8.12-slim
|
|
|
2 |
|
3 |
+
RUN /usr/local/bin/python -m pip install --upgrade pip
|
4 |
|
5 |
+
WORKDIR /app
|
6 |
|
7 |
+
COPY . .
|
8 |
|
9 |
+
RUN pip install -r requirements.txt
|
10 |
|
11 |
+
EXPOSE 8501
|
12 |
+
|
13 |
+
ENTRYPOINT ["streamlit", "run"]
|
14 |
|
15 |
+
CMD ["stream_app.py"]
|
Pipfile
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[[source]]
|
2 |
+
url = "https://pypi.org/simple"
|
3 |
+
verify_ssl = true
|
4 |
+
name = "pypi"
|
5 |
+
|
6 |
+
[packages]
|
7 |
+
numpy = "*"
|
8 |
+
pandas = "*"
|
9 |
+
streamlit = "==0.87"
|
10 |
+
scikit-learn = "==0.24.2"
|
11 |
+
Pillow = "*"
|
12 |
+
click = "<8"
|
13 |
+
protobuf = "==3.20.1"
|
14 |
+
|
15 |
+
[dev-packages]
|
16 |
+
|
17 |
+
[requires]
|
18 |
+
python_version = "3.8"
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
scikit-learn==0.24.2
|
2 |
+
streamlit
|
stream_app.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
import streamlit as st
|
3 |
+
import pandas as pd
|
4 |
+
from PIL import Image
|
5 |
+
model_file = 'model_C=1.0.bin'
|
6 |
+
|
7 |
+
with open(model_file, 'rb') as f_in:
|
8 |
+
dv, model = pickle.load(f_in)
|
9 |
+
|
10 |
+
|
11 |
+
def main():
|
12 |
+
|
13 |
+
image = Image.open('images/icone.png')
|
14 |
+
image2 = Image.open('images/image.png')
|
15 |
+
st.image(image,use_column_width=False)
|
16 |
+
add_selectbox = st.sidebar.selectbox(
|
17 |
+
"How would you like to predict?",
|
18 |
+
("Online", "Batch"))
|
19 |
+
st.sidebar.info('This app is created to predict Customer Churn')
|
20 |
+
st.sidebar.image(image2)
|
21 |
+
st.title("Predicting Customer Churn")
|
22 |
+
if add_selectbox == 'Online':
|
23 |
+
gender = st.selectbox('Gender:', ['male', 'female'])
|
24 |
+
seniorcitizen= st.selectbox(' Customer is a senior citizen:', [0, 1])
|
25 |
+
partner= st.selectbox(' Customer has a partner:', ['yes', 'no'])
|
26 |
+
dependents = st.selectbox(' Customer has dependents:', ['yes', 'no'])
|
27 |
+
phoneservice = st.selectbox(' Customer has phoneservice:', ['yes', 'no'])
|
28 |
+
multiplelines = st.selectbox(' Customer has multiplelines:', ['yes', 'no', 'no_phone_service'])
|
29 |
+
internetservice= st.selectbox(' Customer has internetservice:', ['dsl', 'no', 'fiber_optic'])
|
30 |
+
onlinesecurity= st.selectbox(' Customer has onlinesecurity:', ['yes', 'no', 'no_internet_service'])
|
31 |
+
onlinebackup = st.selectbox(' Customer has onlinebackup:', ['yes', 'no', 'no_internet_service'])
|
32 |
+
deviceprotection = st.selectbox(' Customer has deviceprotection:', ['yes', 'no', 'no_internet_service'])
|
33 |
+
techsupport = st.selectbox(' Customer has techsupport:', ['yes', 'no', 'no_internet_service'])
|
34 |
+
streamingtv = st.selectbox(' Customer has streamingtv:', ['yes', 'no', 'no_internet_service'])
|
35 |
+
streamingmovies = st.selectbox(' Customer has streamingmovies:', ['yes', 'no', 'no_internet_service'])
|
36 |
+
contract= st.selectbox(' Customer has a contract:', ['month-to-month', 'one_year', 'two_year'])
|
37 |
+
paperlessbilling = st.selectbox(' Customer has a paperlessbilling:', ['yes', 'no'])
|
38 |
+
paymentmethod= st.selectbox('Payment Option:', ['bank_transfer_(automatic)', 'credit_card_(automatic)', 'electronic_check' ,'mailed_check'])
|
39 |
+
tenure = st.number_input('Number of months the customer has been with the current telco provider :', min_value=0, max_value=240, value=0)
|
40 |
+
monthlycharges= st.number_input('Monthly charges :', min_value=0, max_value=240, value=0)
|
41 |
+
totalcharges = tenure*monthlycharges
|
42 |
+
output= ""
|
43 |
+
output_prob = ""
|
44 |
+
input_dict={
|
45 |
+
"gender":gender ,
|
46 |
+
"seniorcitizen": seniorcitizen,
|
47 |
+
"partner": partner,
|
48 |
+
"dependents": dependents,
|
49 |
+
"phoneservice": phoneservice,
|
50 |
+
"multiplelines": multiplelines,
|
51 |
+
"internetservice": internetservice,
|
52 |
+
"onlinesecurity": onlinesecurity,
|
53 |
+
"onlinebackup": onlinebackup,
|
54 |
+
"deviceprotection": deviceprotection,
|
55 |
+
"techsupport": techsupport,
|
56 |
+
"streamingtv": streamingtv,
|
57 |
+
"streamingmovies": streamingmovies,
|
58 |
+
"contract": contract,
|
59 |
+
"paperlessbilling": paperlessbilling,
|
60 |
+
"paymentmethod": paymentmethod,
|
61 |
+
"tenure": tenure,
|
62 |
+
"monthlycharges": monthlycharges,
|
63 |
+
"totalcharges": totalcharges
|
64 |
+
}
|
65 |
+
|
66 |
+
if st.button("Predict"):
|
67 |
+
X = dv.transform([input_dict])
|
68 |
+
y_pred = model.predict_proba(X)[0, 1]
|
69 |
+
churn = y_pred >= 0.5
|
70 |
+
output_prob = float(y_pred)
|
71 |
+
output = bool(churn)
|
72 |
+
st.success('Churn: {0}, Risk Score: {1}'.format(output, output_prob))
|
73 |
+
if add_selectbox == 'Batch':
|
74 |
+
file_upload = st.file_uploader("Upload csv file for predictions", type=["csv"])
|
75 |
+
if file_upload is not None:
|
76 |
+
data = pd.read_csv(file_upload)
|
77 |
+
X = dv.transform([data])
|
78 |
+
y_pred = model.predict_proba(X)[0, 1]
|
79 |
+
churn = y_pred >= 0.5
|
80 |
+
churn = bool(churn)
|
81 |
+
st.write(churn)
|
82 |
+
|
83 |
+
if __name__ == '__main__':
|
84 |
+
main()
|
train.py
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# coding: utf-8
|
3 |
+
|
4 |
+
import pickle
|
5 |
+
|
6 |
+
import pandas as pd
|
7 |
+
import numpy as np
|
8 |
+
|
9 |
+
from sklearn.model_selection import train_test_split
|
10 |
+
from sklearn.model_selection import KFold
|
11 |
+
|
12 |
+
from sklearn.feature_extraction import DictVectorizer
|
13 |
+
from sklearn.linear_model import LogisticRegression
|
14 |
+
from sklearn.metrics import roc_auc_score
|
15 |
+
|
16 |
+
# parameters
|
17 |
+
|
18 |
+
C = 1.0
|
19 |
+
n_splits = 5
|
20 |
+
output_file = f'model_C={C}.bin'
|
21 |
+
|
22 |
+
# data preparation
|
23 |
+
|
24 |
+
df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')
|
25 |
+
|
26 |
+
df.columns = df.columns.str.lower().str.replace(' ', '_')
|
27 |
+
|
28 |
+
categorical_columns = list(df.dtypes[df.dtypes == 'object'].index)
|
29 |
+
|
30 |
+
for c in categorical_columns:
|
31 |
+
df[c] = df[c].str.lower().str.replace(' ', '_')
|
32 |
+
|
33 |
+
df.totalcharges = pd.to_numeric(df.totalcharges, errors='coerce')
|
34 |
+
df.totalcharges = df.totalcharges.fillna(0)
|
35 |
+
|
36 |
+
df.churn = (df.churn == 'yes').astype(int)
|
37 |
+
|
38 |
+
df_full_train, df_test = train_test_split(df, test_size=0.2, random_state=1)
|
39 |
+
|
40 |
+
numerical = ['tenure', 'monthlycharges', 'totalcharges']
|
41 |
+
|
42 |
+
categorical = [
|
43 |
+
'gender',
|
44 |
+
'seniorcitizen',
|
45 |
+
'partner',
|
46 |
+
'dependents',
|
47 |
+
'phoneservice',
|
48 |
+
'multiplelines',
|
49 |
+
'internetservice',
|
50 |
+
'onlinesecurity',
|
51 |
+
'onlinebackup',
|
52 |
+
'deviceprotection',
|
53 |
+
'techsupport',
|
54 |
+
'streamingtv',
|
55 |
+
'streamingmovies',
|
56 |
+
'contract',
|
57 |
+
'paperlessbilling',
|
58 |
+
'paymentmethod',
|
59 |
+
]
|
60 |
+
|
61 |
+
|
62 |
+
# training
|
63 |
+
|
64 |
+
def train(df_train, y_train, C=1.0):
|
65 |
+
dicts = df_train[categorical + numerical].to_dict(orient='records')
|
66 |
+
|
67 |
+
dv = DictVectorizer(sparse=False)
|
68 |
+
X_train = dv.fit_transform(dicts)
|
69 |
+
|
70 |
+
model = LogisticRegression(C=C, max_iter=1000)
|
71 |
+
model.fit(X_train, y_train)
|
72 |
+
|
73 |
+
return dv, model
|
74 |
+
|
75 |
+
|
76 |
+
def predict(df, dv, model):
|
77 |
+
dicts = df[categorical + numerical].to_dict(orient='records')
|
78 |
+
|
79 |
+
X = dv.transform(dicts)
|
80 |
+
y_pred = model.predict_proba(X)[:, 1]
|
81 |
+
|
82 |
+
return y_pred
|
83 |
+
|
84 |
+
|
85 |
+
# validation
|
86 |
+
|
87 |
+
print(f'doing validation with C={C}')
|
88 |
+
|
89 |
+
kfold = KFold(n_splits=n_splits, shuffle=True, random_state=1)
|
90 |
+
|
91 |
+
scores = []
|
92 |
+
|
93 |
+
fold = 0
|
94 |
+
|
95 |
+
for train_idx, val_idx in kfold.split(df_full_train):
|
96 |
+
df_train = df_full_train.iloc[train_idx]
|
97 |
+
df_val = df_full_train.iloc[val_idx]
|
98 |
+
|
99 |
+
y_train = df_train.churn.values
|
100 |
+
y_val = df_val.churn.values
|
101 |
+
|
102 |
+
dv, model = train(df_train, y_train, C=C)
|
103 |
+
y_pred = predict(df_val, dv, model)
|
104 |
+
|
105 |
+
auc = roc_auc_score(y_val, y_pred)
|
106 |
+
scores.append(auc)
|
107 |
+
|
108 |
+
print(f'auc on fold {fold} is {auc}')
|
109 |
+
fold = fold + 1
|
110 |
+
|
111 |
+
print('validation results:')
|
112 |
+
print('C=%s %.3f +- %.3f' % (C, np.mean(scores), np.std(scores)))
|
113 |
+
|
114 |
+
# training the final model
|
115 |
+
|
116 |
+
print('training the final model')
|
117 |
+
|
118 |
+
dv, model = train(df_full_train, df_full_train.churn.values, C=1.0)
|
119 |
+
y_pred = predict(df_test, dv, model)
|
120 |
+
|
121 |
+
y_test = df_test.churn.values
|
122 |
+
auc = roc_auc_score(y_test, y_pred)
|
123 |
+
|
124 |
+
print(f'auc={auc}')
|
125 |
+
|
126 |
+
# Save the model
|
127 |
+
|
128 |
+
with open(output_file, 'wb') as f_out:
|
129 |
+
pickle.dump((dv, model), f_out)
|
130 |
+
|
131 |
+
print(f'the model is saved to {output_file}')
|