Spaces:
Runtime error
Runtime error
Isabel Gwara
commited on
Commit
·
5c2fff1
1
Parent(s):
1b929ef
Update app.py
Browse files
app.py
CHANGED
@@ -70,14 +70,14 @@ for (colname, colval) in uncleaned_data.iteritems():
|
|
70 |
|
71 |
cat_value_dicts[colname] = new_dict
|
72 |
data[colname] = transformed_col_vals
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
def train_model():
|
80 |
-
# select features and
|
81 |
cols = len(data.columns)
|
82 |
num_features = cols - 1
|
83 |
x = data.iloc[: , :num_features]
|
@@ -91,10 +91,11 @@ def train_model():
|
|
91 |
model.fit(x_train, y_train.values.ravel())
|
92 |
y_pred = model.predict(x_test)
|
93 |
|
94 |
-
# save the model to file
|
95 |
with open('model.pkl', 'wb') as f:
|
96 |
pkl.dump(model, f)
|
97 |
|
|
|
98 |
with open('acc.txt', 'w+') as f:
|
99 |
acc = metrics.accuracy_score(y_test, y_pred)
|
100 |
f.write(str(round(acc * 100, 1)) + '%')
|
@@ -105,24 +106,28 @@ def train_model():
|
|
105 |
### rerun logic ###
|
106 |
### -------------------------------- ###
|
107 |
|
|
|
|
|
108 |
try:
|
109 |
with open('model.pkl', 'rb') as f:
|
110 |
model = pkl.load(f)
|
|
|
|
|
|
|
111 |
except FileNotFoundError as e:
|
112 |
model = train_model()
|
113 |
|
|
|
114 |
with open('acc.txt', 'r') as f:
|
115 |
acc = f.read()
|
116 |
|
117 |
|
118 |
-
|
119 |
-
|
120 |
### ------------------------------- ###
|
121 |
### interface creation ###
|
122 |
### ------------------------------- ###
|
123 |
|
124 |
-
|
125 |
-
#
|
126 |
def general_predictor(input_list):
|
127 |
features = []
|
128 |
|
@@ -140,9 +145,9 @@ def general_predictor(input_list):
|
|
140 |
|
141 |
def get_feat():
|
142 |
feats = [abs(x) for x in model.coef_[0]]
|
143 |
-
|
144 |
-
|
145 |
-
return
|
146 |
|
147 |
form = st.form('ml-inputs')
|
148 |
|
@@ -171,7 +176,7 @@ if form.form_submit_button("Submit to get your recommendation!"):
|
|
171 |
col1, col2 = st.columns(2)
|
172 |
col1.metric("Number of Different Possible Results", len(cat_value_dicts[final_colname]))
|
173 |
col2.metric("Model Accuracy", acc)
|
174 |
-
|
175 |
|
176 |
|
177 |
with open('info.md') as f:
|
|
|
70 |
|
71 |
cat_value_dicts[colname] = new_dict
|
72 |
data[colname] = transformed_col_vals
|
73 |
+
|
74 |
+
|
75 |
+
### -------------------------------- ###
|
76 |
+
### model training ###
|
77 |
+
### -------------------------------- ###
|
78 |
+
|
79 |
def train_model():
|
80 |
+
# select features and prediction; automatically selects last column as prediction
|
81 |
cols = len(data.columns)
|
82 |
num_features = cols - 1
|
83 |
x = data.iloc[: , :num_features]
|
|
|
91 |
model.fit(x_train, y_train.values.ravel())
|
92 |
y_pred = model.predict(x_test)
|
93 |
|
94 |
+
# save the model to file using the pickle package
|
95 |
with open('model.pkl', 'wb') as f:
|
96 |
pkl.dump(model, f)
|
97 |
|
98 |
+
# save model accuracy to file using the pickle package
|
99 |
with open('acc.txt', 'w+') as f:
|
100 |
acc = metrics.accuracy_score(y_test, y_pred)
|
101 |
f.write(str(round(acc * 100, 1)) + '%')
|
|
|
106 |
### rerun logic ###
|
107 |
### -------------------------------- ###
|
108 |
|
109 |
+
# check to see if this is the first time running the script,
|
110 |
+
# if the model has already been trained and saved, load it
|
111 |
try:
|
112 |
with open('model.pkl', 'rb') as f:
|
113 |
model = pkl.load(f)
|
114 |
+
|
115 |
+
# if this is the first time running the script, train the model
|
116 |
+
# and save it to the file model.pkl
|
117 |
except FileNotFoundError as e:
|
118 |
model = train_model()
|
119 |
|
120 |
+
# read the model accuracy from file
|
121 |
with open('acc.txt', 'r') as f:
|
122 |
acc = f.read()
|
123 |
|
124 |
|
|
|
|
|
125 |
### ------------------------------- ###
|
126 |
### interface creation ###
|
127 |
### ------------------------------- ###
|
128 |
|
129 |
+
# uses the logistic regression to predict for a generic number
|
130 |
+
# of features
|
131 |
def general_predictor(input_list):
|
132 |
features = []
|
133 |
|
|
|
145 |
|
146 |
def get_feat():
|
147 |
feats = [abs(x) for x in model.coef_[0]]
|
148 |
+
max_val = max(feats)
|
149 |
+
idx = feats.index(max_val)
|
150 |
+
return data.columns[idx]
|
151 |
|
152 |
form = st.form('ml-inputs')
|
153 |
|
|
|
176 |
col1, col2 = st.columns(2)
|
177 |
col1.metric("Number of Different Possible Results", len(cat_value_dicts[final_colname]))
|
178 |
col2.metric("Model Accuracy", acc)
|
179 |
+
st.metric("Most Important Question", get_feat())
|
180 |
|
181 |
|
182 |
with open('info.md') as f:
|