Ajeet Singh Raina commited on
Commit
8cb8488
·
1 Parent(s): bca0fbc
Files changed (5) hide show
  1. Dockerfile +9 -8
  2. Pipfile +18 -0
  3. requirements.txt +2 -0
  4. stream_app.py +84 -0
  5. train.py +131 -0
Dockerfile CHANGED
@@ -1,14 +1,15 @@
1
- # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
- # you will also find guides on how best to write your Dockerfile
3
 
4
- FROM python:3.9
5
 
6
- WORKDIR /code
7
 
8
- COPY ./requirements.txt /code/requirements.txt
9
 
10
- RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
 
12
- COPY . .
 
 
13
 
14
- CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ FROM python:3.8.12-slim
 
2
 
3
+ RUN /usr/local/bin/python -m pip install --upgrade pip
4
 
5
+ WORKDIR /app
6
 
7
+ COPY . .
8
 
9
+ RUN pip install -r requirements.txt
10
 
11
+ EXPOSE 8501
12
+
13
+ ENTRYPOINT ["streamlit", "run"]
14
 
15
+ CMD ["stream_app.py"]
Pipfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [[source]]
2
+ url = "https://pypi.org/simple"
3
+ verify_ssl = true
4
+ name = "pypi"
5
+
6
+ [packages]
7
+ numpy = "*"
8
+ pandas = "*"
9
+ streamlit = "==0.87"
10
+ scikit-learn = "==0.24.2"
11
+ Pillow = "*"
12
+ click = "<8"
13
+ protobuf = "==3.20.1"
14
+
15
+ [dev-packages]
16
+
17
+ [requires]
18
+ python_version = "3.8"
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ scikit-learn==0.24.2
2
+ streamlit
stream_app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import streamlit as st
3
+ import pandas as pd
4
+ from PIL import Image
5
+ model_file = 'model_C=1.0.bin'
6
+
7
+ with open(model_file, 'rb') as f_in:
8
+ dv, model = pickle.load(f_in)
9
+
10
+
11
+ def main():
12
+
13
+ image = Image.open('images/icone.png')
14
+ image2 = Image.open('images/image.png')
15
+ st.image(image,use_column_width=False)
16
+ add_selectbox = st.sidebar.selectbox(
17
+ "How would you like to predict?",
18
+ ("Online", "Batch"))
19
+ st.sidebar.info('This app is created to predict Customer Churn')
20
+ st.sidebar.image(image2)
21
+ st.title("Predicting Customer Churn")
22
+ if add_selectbox == 'Online':
23
+ gender = st.selectbox('Gender:', ['male', 'female'])
24
+ seniorcitizen= st.selectbox(' Customer is a senior citizen:', [0, 1])
25
+ partner= st.selectbox(' Customer has a partner:', ['yes', 'no'])
26
+ dependents = st.selectbox(' Customer has dependents:', ['yes', 'no'])
27
+ phoneservice = st.selectbox(' Customer has phoneservice:', ['yes', 'no'])
28
+ multiplelines = st.selectbox(' Customer has multiplelines:', ['yes', 'no', 'no_phone_service'])
29
+ internetservice= st.selectbox(' Customer has internetservice:', ['dsl', 'no', 'fiber_optic'])
30
+ onlinesecurity= st.selectbox(' Customer has onlinesecurity:', ['yes', 'no', 'no_internet_service'])
31
+ onlinebackup = st.selectbox(' Customer has onlinebackup:', ['yes', 'no', 'no_internet_service'])
32
+ deviceprotection = st.selectbox(' Customer has deviceprotection:', ['yes', 'no', 'no_internet_service'])
33
+ techsupport = st.selectbox(' Customer has techsupport:', ['yes', 'no', 'no_internet_service'])
34
+ streamingtv = st.selectbox(' Customer has streamingtv:', ['yes', 'no', 'no_internet_service'])
35
+ streamingmovies = st.selectbox(' Customer has streamingmovies:', ['yes', 'no', 'no_internet_service'])
36
+ contract= st.selectbox(' Customer has a contract:', ['month-to-month', 'one_year', 'two_year'])
37
+ paperlessbilling = st.selectbox(' Customer has a paperlessbilling:', ['yes', 'no'])
38
+ paymentmethod= st.selectbox('Payment Option:', ['bank_transfer_(automatic)', 'credit_card_(automatic)', 'electronic_check' ,'mailed_check'])
39
+ tenure = st.number_input('Number of months the customer has been with the current telco provider :', min_value=0, max_value=240, value=0)
40
+ monthlycharges= st.number_input('Monthly charges :', min_value=0, max_value=240, value=0)
41
+ totalcharges = tenure*monthlycharges
42
+ output= ""
43
+ output_prob = ""
44
+ input_dict={
45
+ "gender":gender ,
46
+ "seniorcitizen": seniorcitizen,
47
+ "partner": partner,
48
+ "dependents": dependents,
49
+ "phoneservice": phoneservice,
50
+ "multiplelines": multiplelines,
51
+ "internetservice": internetservice,
52
+ "onlinesecurity": onlinesecurity,
53
+ "onlinebackup": onlinebackup,
54
+ "deviceprotection": deviceprotection,
55
+ "techsupport": techsupport,
56
+ "streamingtv": streamingtv,
57
+ "streamingmovies": streamingmovies,
58
+ "contract": contract,
59
+ "paperlessbilling": paperlessbilling,
60
+ "paymentmethod": paymentmethod,
61
+ "tenure": tenure,
62
+ "monthlycharges": monthlycharges,
63
+ "totalcharges": totalcharges
64
+ }
65
+
66
+ if st.button("Predict"):
67
+ X = dv.transform([input_dict])
68
+ y_pred = model.predict_proba(X)[0, 1]
69
+ churn = y_pred >= 0.5
70
+ output_prob = float(y_pred)
71
+ output = bool(churn)
72
+ st.success('Churn: {0}, Risk Score: {1}'.format(output, output_prob))
73
+ if add_selectbox == 'Batch':
74
+ file_upload = st.file_uploader("Upload csv file for predictions", type=["csv"])
75
+ if file_upload is not None:
76
+ data = pd.read_csv(file_upload)
77
+ X = dv.transform([data])
78
+ y_pred = model.predict_proba(X)[0, 1]
79
+ churn = y_pred >= 0.5
80
+ churn = bool(churn)
81
+ st.write(churn)
82
+
83
+ if __name__ == '__main__':
84
+ main()
train.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # coding: utf-8
3
+
4
+ import pickle
5
+
6
+ import pandas as pd
7
+ import numpy as np
8
+
9
+ from sklearn.model_selection import train_test_split
10
+ from sklearn.model_selection import KFold
11
+
12
+ from sklearn.feature_extraction import DictVectorizer
13
+ from sklearn.linear_model import LogisticRegression
14
+ from sklearn.metrics import roc_auc_score
15
+
16
+ # parameters
17
+
18
+ C = 1.0
19
+ n_splits = 5
20
+ output_file = f'model_C={C}.bin'
21
+
22
+ # data preparation
23
+
24
+ df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')
25
+
26
+ df.columns = df.columns.str.lower().str.replace(' ', '_')
27
+
28
+ categorical_columns = list(df.dtypes[df.dtypes == 'object'].index)
29
+
30
+ for c in categorical_columns:
31
+ df[c] = df[c].str.lower().str.replace(' ', '_')
32
+
33
+ df.totalcharges = pd.to_numeric(df.totalcharges, errors='coerce')
34
+ df.totalcharges = df.totalcharges.fillna(0)
35
+
36
+ df.churn = (df.churn == 'yes').astype(int)
37
+
38
+ df_full_train, df_test = train_test_split(df, test_size=0.2, random_state=1)
39
+
40
+ numerical = ['tenure', 'monthlycharges', 'totalcharges']
41
+
42
+ categorical = [
43
+ 'gender',
44
+ 'seniorcitizen',
45
+ 'partner',
46
+ 'dependents',
47
+ 'phoneservice',
48
+ 'multiplelines',
49
+ 'internetservice',
50
+ 'onlinesecurity',
51
+ 'onlinebackup',
52
+ 'deviceprotection',
53
+ 'techsupport',
54
+ 'streamingtv',
55
+ 'streamingmovies',
56
+ 'contract',
57
+ 'paperlessbilling',
58
+ 'paymentmethod',
59
+ ]
60
+
61
+
62
+ # training
63
+
64
+ def train(df_train, y_train, C=1.0):
65
+ dicts = df_train[categorical + numerical].to_dict(orient='records')
66
+
67
+ dv = DictVectorizer(sparse=False)
68
+ X_train = dv.fit_transform(dicts)
69
+
70
+ model = LogisticRegression(C=C, max_iter=1000)
71
+ model.fit(X_train, y_train)
72
+
73
+ return dv, model
74
+
75
+
76
+ def predict(df, dv, model):
77
+ dicts = df[categorical + numerical].to_dict(orient='records')
78
+
79
+ X = dv.transform(dicts)
80
+ y_pred = model.predict_proba(X)[:, 1]
81
+
82
+ return y_pred
83
+
84
+
85
+ # validation
86
+
87
+ print(f'doing validation with C={C}')
88
+
89
+ kfold = KFold(n_splits=n_splits, shuffle=True, random_state=1)
90
+
91
+ scores = []
92
+
93
+ fold = 0
94
+
95
+ for train_idx, val_idx in kfold.split(df_full_train):
96
+ df_train = df_full_train.iloc[train_idx]
97
+ df_val = df_full_train.iloc[val_idx]
98
+
99
+ y_train = df_train.churn.values
100
+ y_val = df_val.churn.values
101
+
102
+ dv, model = train(df_train, y_train, C=C)
103
+ y_pred = predict(df_val, dv, model)
104
+
105
+ auc = roc_auc_score(y_val, y_pred)
106
+ scores.append(auc)
107
+
108
+ print(f'auc on fold {fold} is {auc}')
109
+ fold = fold + 1
110
+
111
+ print('validation results:')
112
+ print('C=%s %.3f +- %.3f' % (C, np.mean(scores), np.std(scores)))
113
+
114
+ # training the final model
115
+
116
+ print('training the final model')
117
+
118
+ dv, model = train(df_full_train, df_full_train.churn.values, C=1.0)
119
+ y_pred = predict(df_test, dv, model)
120
+
121
+ y_test = df_test.churn.values
122
+ auc = roc_auc_score(y_test, y_pred)
123
+
124
+ print(f'auc={auc}')
125
+
126
+ # Save the model
127
+
128
+ with open(output_file, 'wb') as f_out:
129
+ pickle.dump((dv, model), f_out)
130
+
131
+ print(f'the model is saved to {output_file}')