Spaces:
Runtime error
Runtime error
File size: 5,498 Bytes
eb1f440 93ba032 eb1f440 9ce0485 4b388b8 ed0c441 616896b ed0c441 eb1f440 c1478a9 93ba032 c1478a9 93ba032 c1478a9 93ba032 c1478a9 93ba032 c1478a9 93ba032 c1478a9 93ba032 c1478a9 93ba032 c1478a9 93ba032 c1478a9 eb1f440 c1478a9 eb1f440 c1478a9 5c2fff1 c1478a9 5c2fff1 93ba032 42b052d 93ba032 5c2fff1 5f22042 b004973 4b388b8 5c2fff1 7b32eb4 4b388b8 93ba032 eb1f440 5c2fff1 b21d5e0 7415061 848851d 5c2fff1 b21d5e0 4b388b8 eb1f440 5c2fff1 92d7932 b21d5e0 eb1f440 5c2fff1 8f299bb eb1f440 8f299bb eb1f440 8f299bb eb1f440 8f299bb eb1f440 4b51a5a eb1f440 52855ba 5c2fff1 52855ba cb33fad f5761ae cb33fad 58fbefd eb1f440 58fbefd eb1f440 58fbefd 680cff7 eb1f440 0a00489 4b51a5a eb1f440 680cff7 0e8c5c1 9713997 0b93644 df169d5 32aefbe 8e2b9e5 220313a b7d5afb e2e4921 cb33fad f5761ae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
### ----------------------------- ###
### libraries ###
### ----------------------------- ###
import streamlit as st
import pickle as pkl
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
### ----------------------------- ###
### interface setup ###
### ----------------------------- ###
with open('styles.css') as f:
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
### ------------------------------ ###
### data transformation ###
### ------------------------------ ###
# load dataset
uncleaned_data = pd.read_csv('data.csv')
# remove timestamp from dataset (always first column)
uncleaned_data = uncleaned_data.iloc[: , 1:]
data = pd.DataFrame()
# keep track of which columns are categorical and what
# those columns' value mappings are
# structure: {colname1: {...}, colname2: {...} }
cat_value_dicts = {}
final_colname = uncleaned_data.columns[len(uncleaned_data.columns) - 1]
# for each column...
for (colname, colval) in uncleaned_data.iteritems():
# check if col is already a number; if so, add col directly
# to new dataframe and skip to next column
if isinstance(colval.values[0], (np.integer, float)):
data[colname] = uncleaned_data[colname].copy()
continue
# structure: {0: "lilac", 1: "blue", ...}
new_dict = {}
val = 0 # first index per column
transformed_col_vals = [] # new numeric datapoints
# if not, for each item in that column...
for (row, item) in enumerate(colval.values):
# if item is not in this col's dict...
if item not in new_dict:
new_dict[item] = val
val += 1
# then add numerical value to transformed dataframe
transformed_col_vals.append(new_dict[item])
# reverse dictionary only for final col (0, 1) => (vals)
if colname == final_colname:
new_dict = {value : key for (key, value) in new_dict.items()}
cat_value_dicts[colname] = new_dict
data[colname] = transformed_col_vals
### -------------------------------- ###
### model training ###
### -------------------------------- ###
def train_model():
# select features and prediction; automatically selects last column as prediction
cols = len(data.columns)
num_features = cols - 1
x = data.iloc[: , :num_features]
y = data.iloc[: , num_features:]
# split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)
# instantiate the model (using default parameters)
model = LogisticRegression()
model.fit(x_train, y_train.values.ravel())
y_pred = model.predict(x_test)
# save the model to file using the pickle package
with open('model.pkl', 'wb') as f:
pkl.dump(model, f)
# save model accuracy to file using the pickle package
with open('acc.txt', 'w+') as f:
acc = metrics.accuracy_score(y_test, y_pred)
f.write(str(round(acc * 100, 1)) + '%')
return model
### -------------------------------- ###
### rerun logic ###
### -------------------------------- ###
# check to see if this is the first time running the script,
# if the model has already been trained and saved, load it
try:
with open('model.pkl', 'rb') as f:
model = pkl.load(f)
# if this is the first time running the script, train the model
# and save it to the file model.pkl
except FileNotFoundError as e:
model = train_model()
# read the model accuracy from file
with open('acc.txt', 'r') as f:
acc = f.read()
### ------------------------------- ###
### interface creation ###
### ------------------------------- ###
# uses the logistic regression to predict for a generic number
# of features
def general_predictor(input_list):
features = []
# transform categorical input
for colname, input in zip(data.columns, input_list):
if (colname in cat_value_dicts):
features.append(cat_value_dicts[colname][input])
else:
features.append(input)
# predict single datapoint
new_input = [features]
result = model.predict(new_input)
return cat_value_dicts[final_colname][result[0]]
def get_feat():
feats = [abs(x) for x in model.coef_[0]]
max_val = max(feats)
idx = feats.index(max_val)
return data.columns[idx]
with open('info.md') as f:
st.title(f.readline())
st.subheader('Take the quiz to get a personalized recommendation using AI.')
form = st.form('ml-inputs')
# add data labels to replace those lost via star-args
inputls = []
for colname in data.columns:
# skip last column
if colname == final_colname:
continue
# access categories dict if data is categorical
# otherwise, just use a number input
if colname in cat_value_dicts:
radio_options = list(cat_value_dicts[colname].keys())
inputls.append(form.selectbox(colname, radio_options))
else:
# add numerical input
inputls.append(form.number_imput(colname))
# generate gradio interface
if form.form_submit_button("Submit to get your recommendation!"):
prediction = general_predictor(inputls)
form.subheader(prediction)
col1, col2 = st.columns(2)
col1.metric("Number of Different Possible Results", len(cat_value_dicts[final_colname]))
col2.metric("Model Accuracy", acc)
st.metric("Most Important Question", "")
st.subheader(get_feat())
st.markdown("***")
with open('info.md') as f:
f.readline()
st.markdown(f.read()) |