Isabel Gwara commited on
Commit
5c2fff1
·
1 Parent(s): 1b929ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -16
app.py CHANGED
@@ -70,14 +70,14 @@ for (colname, colval) in uncleaned_data.iteritems():
70
 
71
  cat_value_dicts[colname] = new_dict
72
  data[colname] = transformed_col_vals
73
-
74
-
75
- ### -------------------------------- ###
76
- ### model training ###
77
- ### -------------------------------- ###
78
-
79
  def train_model():
80
- # select features and predicton; automatically selects last column as prediction
81
  cols = len(data.columns)
82
  num_features = cols - 1
83
  x = data.iloc[: , :num_features]
@@ -91,10 +91,11 @@ def train_model():
91
  model.fit(x_train, y_train.values.ravel())
92
  y_pred = model.predict(x_test)
93
 
94
- # save the model to file
95
  with open('model.pkl', 'wb') as f:
96
  pkl.dump(model, f)
97
 
 
98
  with open('acc.txt', 'w+') as f:
99
  acc = metrics.accuracy_score(y_test, y_pred)
100
  f.write(str(round(acc * 100, 1)) + '%')
@@ -105,24 +106,28 @@ def train_model():
105
  ### rerun logic ###
106
  ### -------------------------------- ###
107
 
 
 
108
  try:
109
  with open('model.pkl', 'rb') as f:
110
  model = pkl.load(f)
 
 
 
111
  except FileNotFoundError as e:
112
  model = train_model()
113
 
 
114
  with open('acc.txt', 'r') as f:
115
  acc = f.read()
116
 
117
 
118
-
119
-
120
  ### ------------------------------- ###
121
  ### interface creation ###
122
  ### ------------------------------- ###
123
 
124
-
125
- # predictor for generic number of features
126
  def general_predictor(input_list):
127
  features = []
128
 
@@ -140,9 +145,9 @@ def general_predictor(input_list):
140
 
141
  def get_feat():
142
  feats = [abs(x) for x in model.coef_[0]]
143
- # max_val = max(feats)
144
- # idx = feats.index(max_val)
145
- return str(feats) # data.columns[idx]
146
 
147
  form = st.form('ml-inputs')
148
 
@@ -171,7 +176,7 @@ if form.form_submit_button("Submit to get your recommendation!"):
171
  col1, col2 = st.columns(2)
172
  col1.metric("Number of Different Possible Results", len(cat_value_dicts[final_colname]))
173
  col2.metric("Model Accuracy", acc)
174
- # st.metric("Most Important Question", get_feat())
175
 
176
 
177
  with open('info.md') as f:
 
70
 
71
  cat_value_dicts[colname] = new_dict
72
  data[colname] = transformed_col_vals
73
+
74
+
75
+ ### -------------------------------- ###
76
+ ### model training ###
77
+ ### -------------------------------- ###
78
+
79
  def train_model():
80
+ # select features and prediction; automatically selects last column as prediction
81
  cols = len(data.columns)
82
  num_features = cols - 1
83
  x = data.iloc[: , :num_features]
 
91
  model.fit(x_train, y_train.values.ravel())
92
  y_pred = model.predict(x_test)
93
 
94
+ # save the model to file using the pickle package
95
  with open('model.pkl', 'wb') as f:
96
  pkl.dump(model, f)
97
 
98
+ # save model accuracy to file using the pickle package
99
  with open('acc.txt', 'w+') as f:
100
  acc = metrics.accuracy_score(y_test, y_pred)
101
  f.write(str(round(acc * 100, 1)) + '%')
 
106
  ### rerun logic ###
107
  ### -------------------------------- ###
108
 
109
+ # check to see if this is the first time running the script,
110
+ # if the model has already been trained and saved, load it
111
  try:
112
  with open('model.pkl', 'rb') as f:
113
  model = pkl.load(f)
114
+
115
+ # if this is the first time running the script, train the model
116
+ # and save it to the file model.pkl
117
  except FileNotFoundError as e:
118
  model = train_model()
119
 
120
+ # read the model accuracy from file
121
  with open('acc.txt', 'r') as f:
122
  acc = f.read()
123
 
124
 
 
 
125
  ### ------------------------------- ###
126
  ### interface creation ###
127
  ### ------------------------------- ###
128
 
129
+ # uses the logistic regression to predict for a generic number
130
+ # of features
131
  def general_predictor(input_list):
132
  features = []
133
 
 
145
 
146
  def get_feat():
147
  feats = [abs(x) for x in model.coef_[0]]
148
+ max_val = max(feats)
149
+ idx = feats.index(max_val)
150
+ return data.columns[idx]
151
 
152
  form = st.form('ml-inputs')
153
 
 
176
  col1, col2 = st.columns(2)
177
  col1.metric("Number of Different Possible Results", len(cat_value_dicts[final_colname]))
178
  col2.metric("Model Accuracy", acc)
179
+ st.metric("Most Important Question", get_feat())
180
 
181
 
182
  with open('info.md') as f: