Spaces:

isabel
/

testing-streamlit

Runtime error

File size: 5,015 Bytes

eb1f440
 
 
 
 
93ba032
eb1f440
 
 
 
 
133338c
93ba032
eb1f440
9ce0485
 
 
 
 
ed0c441
616896b
cceaf74
538a20c
68a1154
ed0c441
eb1f440
 
 
 
 
93ba032
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eb1f440
93ba032
 
 
eb1f440
93ba032
 
eb1f440
 
 
 
 
 
93ba032
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eb1f440
93ba032
 
 
eb1f440
93ba032
eb1f440
 
 
 
 
 
 
 
8f299bb
eb1f440
 
 
8f299bb
eb1f440
8f299bb
eb1f440
8f299bb
eb1f440
 
 
 
4b51a5a
eb1f440
52855ba
 
9ea91db
 
 
52855ba
58fbefd
 
eb1f440
 
 
 
 
 
 
 
 
 
 
58fbefd
eb1f440
 
58fbefd
680cff7
eb1f440
0a00489
4b51a5a
eb1f440
680cff7
e2e4921
3898702
52855ba
 
3898702
b7d5afb
 
e2e4921
58fbefd

### ----------------------------- ###
###           libraries           ###
### ----------------------------- ###

import streamlit as st
import pickle as pkl
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import os



### ----------------------------- ###
###        interface setup        ###
### ----------------------------- ###

with open('styles.css') as f:
  st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
  
st.title('Mental Health App')
st.subheader('Feeling like you might need a better coping strategy? Take the quiz to get a personalized recommendation using AI.')


### ------------------------------ ###
###       data transformation      ###
### ------------------------------ ###

def load_dataset():
  # load dataset
  uncleaned_data = pd.read_csv('data.csv')
  
  # remove timestamp from dataset (always first column)
  uncleaned_data = uncleaned_data.iloc[: , 1:]
  data = pd.DataFrame()
  
  # keep track of which columns are categorical and what 
  # those columns' value mappings are
  # structure: {colname1: {...}, colname2: {...} }
  cat_value_dicts = {}
  final_colname = uncleaned_data.columns[len(uncleaned_data.columns) - 1]
  
  # for each column...
  for (colname, colval) in uncleaned_data.iteritems():
  
    # check if col is already a number; if so, add col directly
    # to new dataframe and skip to next column
    if isinstance(colval.values[0], (np.integer, float)):
      data[colname] = uncleaned_data[colname].copy()
      continue
  
    # structure: {0: "lilac", 1: "blue", ...}
    new_dict = {}
    val = 0 # first index per column
    transformed_col_vals = [] # new numeric datapoints
  
    # if not, for each item in that column...
    for (row, item) in enumerate(colval.values):
      
      # if item is not in this col's dict...
      if item not in new_dict:
        new_dict[item] = val
        val += 1
      
      # then add numerical value to transformed dataframe
      transformed_col_vals.append(new_dict[item])
    
    # reverse dictionary only for final col (0, 1) => (vals)
    if colname == final_colname:
      new_dict = {value : key for (key, value) in new_dict.items()}
  
    cat_value_dicts[colname] = new_dict
    data[colname] = transformed_col_vals


### -------------------------------- ###
###           model training         ###
### -------------------------------- ###

def train_model():
  # select features and predicton; automatically selects last column as prediction
  cols = len(data.columns)
  num_features = cols - 1
  x = data.iloc[: , :num_features]
  y = data.iloc[: , num_features:]
  
  # split data into training and testing sets
  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)
  
  # instantiate the model (using default parameters)
  model = LogisticRegression()
  model.fit(x_train, y_train.values.ravel())
  y_pred = model.predict(x_test)
  
  # save the model to file
  pkl.dump(model, 'model.pkl')

### -------------------------------- ###
###            rerun logic           ###
### -------------------------------- ###

if not os.path.exists('model.pkl'):
  load_dataset()
  train_model()

model = pkl.load('model.pkl')


### ------------------------------- ###
###        interface creation       ###
### ------------------------------- ###


# predictor for generic number of features
def general_predictor(input_list):
  features = []

  # transform categorical input
  for colname, input in zip(data.columns, input_list):
    if (colname in cat_value_dicts):
      features.append(cat_value_dicts[colname][input])
    else:
      features.append(input)

  # predict single datapoint
  new_input = [features]
  result = model.predict(new_input)
  return cat_value_dicts[final_colname][result[0]]

def get_feat():
  feats = [abs(x) for x in model.coef_[0]]
  # max_val = max(feats)
  # idx = feats.index(max_val)
  return str(feats) # data.columns[idx]

form = st.form('ml-inputs')

# add data labels to replace those lost via star-args
inputls = []
for colname in data.columns:
  # skip last column
  if colname == final_colname:
    continue

  # access categories dict if data is categorical
  # otherwise, just use a number input
  if colname in cat_value_dicts:
    radio_options = list(cat_value_dicts[colname].keys())
    inputls.append(form.selectbox(colname, radio_options))
  else:
    # add numerical input
    inputls.append(form.number_imput(colname))

# generate gradio interface
if form.form_submit_button("Submit to get your recommendation!"):
  prediction = general_predictor(inputls)
  
  form.subheader(prediction)
  
col1, col2 = st.columns(2)
col1.metric("Number of Options", len(cat_value_dicts[final_colname]))
col2.metric("Model Accuracy", str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + '%')
st.metric("Most Important Question", get_feat())

  
with open('info.md') as f:
  st.markdown(f.read())