Isabel Gwara commited on
Commit
93ba032
·
1 Parent(s): 9ea91db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -53
app.py CHANGED
@@ -3,11 +3,14 @@
3
  ### ----------------------------- ###
4
 
5
  import streamlit as st
 
6
  import pandas as pd
7
  import numpy as np
8
  from sklearn.model_selection import train_test_split
9
  from sklearn.linear_model import LogisticRegression
10
  from sklearn import metrics
 
 
11
 
12
 
13
  ### ----------------------------- ###
@@ -25,69 +28,84 @@ st.subheader('Feeling like you might need a better coping strategy? Take the qui
25
  ### data transformation ###
26
  ### ------------------------------ ###
27
 
28
- # load dataset
29
- uncleaned_data = pd.read_csv('data.csv')
30
-
31
- # remove timestamp from dataset (always first column)
32
- uncleaned_data = uncleaned_data.iloc[: , 1:]
33
- data = pd.DataFrame()
34
-
35
- # keep track of which columns are categorical and what
36
- # those columns' value mappings are
37
- # structure: {colname1: {...}, colname2: {...} }
38
- cat_value_dicts = {}
39
- final_colname = uncleaned_data.columns[len(uncleaned_data.columns) - 1]
40
-
41
- # for each column...
42
- for (colname, colval) in uncleaned_data.iteritems():
43
-
44
- # check if col is already a number; if so, add col directly
45
- # to new dataframe and skip to next column
46
- if isinstance(colval.values[0], (np.integer, float)):
47
- data[colname] = uncleaned_data[colname].copy()
48
- continue
49
-
50
- # structure: {0: "lilac", 1: "blue", ...}
51
- new_dict = {}
52
- val = 0 # first index per column
53
- transformed_col_vals = [] # new numeric datapoints
54
-
55
- # if not, for each item in that column...
56
- for (row, item) in enumerate(colval.values):
57
-
58
- # if item is not in this col's dict...
59
- if item not in new_dict:
60
- new_dict[item] = val
61
- val += 1
 
 
 
 
62
 
63
- # then add numerical value to transformed dataframe
64
- transformed_col_vals.append(new_dict[item])
 
65
 
66
- # reverse dictionary only for final col (0, 1) => (vals)
67
- if colname == final_colname:
68
- new_dict = {value : key for (key, value) in new_dict.items()}
69
-
70
- cat_value_dicts[colname] = new_dict
71
- data[colname] = transformed_col_vals
72
 
73
 
74
  ### -------------------------------- ###
75
  ### model training ###
76
  ### -------------------------------- ###
77
 
78
- # select features and predicton; automatically selects last column as prediction
79
- cols = len(data.columns)
80
- num_features = cols - 1
81
- x = data.iloc[: , :num_features]
82
- y = data.iloc[: , num_features:]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- # split data into training and testing sets
85
- x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)
 
86
 
87
- # instantiate the model (using default parameters)
88
- model = LogisticRegression()
89
- model.fit(x_train, y_train.values.ravel())
90
- y_pred = model.predict(x_test)
91
 
92
 
93
  ### ------------------------------- ###
 
3
  ### ----------------------------- ###
4
 
5
  import streamlit as st
6
+ import pickle as pkl
7
  import pandas as pd
8
  import numpy as np
9
  from sklearn.model_selection import train_test_split
10
  from sklearn.linear_model import LogisticRegression
11
  from sklearn import metrics
12
+ from os.path import exists
13
+
14
 
15
 
16
  ### ----------------------------- ###
 
28
  ### data transformation ###
29
  ### ------------------------------ ###
30
 
31
+ def load_dataset():
32
+ # load dataset
33
+ uncleaned_data = pd.read_csv('data.csv')
34
+
35
+ # remove timestamp from dataset (always first column)
36
+ uncleaned_data = uncleaned_data.iloc[: , 1:]
37
+ data = pd.DataFrame()
38
+
39
+ # keep track of which columns are categorical and what
40
+ # those columns' value mappings are
41
+ # structure: {colname1: {...}, colname2: {...} }
42
+ cat_value_dicts = {}
43
+ final_colname = uncleaned_data.columns[len(uncleaned_data.columns) - 1]
44
+
45
+ # for each column...
46
+ for (colname, colval) in uncleaned_data.iteritems():
47
+
48
+ # check if col is already a number; if so, add col directly
49
+ # to new dataframe and skip to next column
50
+ if isinstance(colval.values[0], (np.integer, float)):
51
+ data[colname] = uncleaned_data[colname].copy()
52
+ continue
53
+
54
+ # structure: {0: "lilac", 1: "blue", ...}
55
+ new_dict = {}
56
+ val = 0 # first index per column
57
+ transformed_col_vals = [] # new numeric datapoints
58
+
59
+ # if not, for each item in that column...
60
+ for (row, item) in enumerate(colval.values):
61
+
62
+ # if item is not in this col's dict...
63
+ if item not in new_dict:
64
+ new_dict[item] = val
65
+ val += 1
66
+
67
+ # then add numerical value to transformed dataframe
68
+ transformed_col_vals.append(new_dict[item])
69
 
70
+ # reverse dictionary only for final col (0, 1) => (vals)
71
+ if colname == final_colname:
72
+ new_dict = {value : key for (key, value) in new_dict.items()}
73
 
74
+ cat_value_dicts[colname] = new_dict
75
+ data[colname] = transformed_col_vals
 
 
 
 
76
 
77
 
78
  ### -------------------------------- ###
79
  ### model training ###
80
  ### -------------------------------- ###
81
 
82
+ def train_model():
83
+ # select features and predicton; automatically selects last column as prediction
84
+ cols = len(data.columns)
85
+ num_features = cols - 1
86
+ x = data.iloc[: , :num_features]
87
+ y = data.iloc[: , num_features:]
88
+
89
+ # split data into training and testing sets
90
+ x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)
91
+
92
+ # instantiate the model (using default parameters)
93
+ model = LogisticRegression()
94
+ model.fit(x_train, y_train.values.ravel())
95
+ y_pred = model.predict(x_test)
96
+
97
+ # save the model to file
98
+ pkl.dump(model, 'model.pkl')
99
+
100
+ ### -------------------------------- ###
101
+ ### rerun logic ###
102
+ ### -------------------------------- ###
103
 
104
+ if not os.path.exists('model.pkl'):
105
+ load_dataset()
106
+ train_model()
107
 
108
+ model = pkl.load('model.pkl')
 
 
 
109
 
110
 
111
  ### ------------------------------- ###