Isabel Gwara commited on
Commit
c1478a9
·
1 Parent(s): 5f22042

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -36
app.py CHANGED
@@ -28,58 +28,58 @@ st.subheader('Feeling like you might need a better coping strategy? Take the qui
28
  ### data transformation ###
29
  ### ------------------------------ ###
30
 
31
- def train_model():
32
- # load dataset
33
- uncleaned_data = pd.read_csv('data.csv')
34
 
35
- # remove timestamp from dataset (always first column)
36
- uncleaned_data = uncleaned_data.iloc[: , 1:]
37
- data = pd.DataFrame()
38
 
39
- # keep track of which columns are categorical and what
40
- # those columns' value mappings are
41
- # structure: {colname1: {...}, colname2: {...} }
42
- cat_value_dicts = {}
43
- final_colname = uncleaned_data.columns[len(uncleaned_data.columns) - 1]
44
 
45
- # for each column...
46
- for (colname, colval) in uncleaned_data.iteritems():
47
 
48
- # check if col is already a number; if so, add col directly
49
- # to new dataframe and skip to next column
50
- if isinstance(colval.values[0], (np.integer, float)):
51
- data[colname] = uncleaned_data[colname].copy()
52
- continue
53
 
54
- # structure: {0: "lilac", 1: "blue", ...}
55
- new_dict = {}
56
- val = 0 # first index per column
57
- transformed_col_vals = [] # new numeric datapoints
58
 
59
- # if not, for each item in that column...
60
- for (row, item) in enumerate(colval.values):
61
 
62
- # if item is not in this col's dict...
63
- if item not in new_dict:
64
- new_dict[item] = val
65
- val += 1
66
 
67
- # then add numerical value to transformed dataframe
68
- transformed_col_vals.append(new_dict[item])
69
 
70
- # reverse dictionary only for final col (0, 1) => (vals)
71
- if colname == final_colname:
72
- new_dict = {value : key for (key, value) in new_dict.items()}
73
 
74
- cat_value_dicts[colname] = new_dict
75
- data[colname] = transformed_col_vals
76
 
77
 
78
  ### -------------------------------- ###
79
  ### model training ###
80
  ### -------------------------------- ###
81
 
82
-
83
  # select features and predicton; automatically selects last column as prediction
84
  cols = len(data.columns)
85
  num_features = cols - 1
 
28
  ### data transformation ###
29
  ### ------------------------------ ###
30
 
31
+
32
+ # load dataset
33
+ uncleaned_data = pd.read_csv('data.csv')
34
 
35
+ # remove timestamp from dataset (always first column)
36
+ uncleaned_data = uncleaned_data.iloc[: , 1:]
37
+ data = pd.DataFrame()
38
 
39
+ # keep track of which columns are categorical and what
40
+ # those columns' value mappings are
41
+ # structure: {colname1: {...}, colname2: {...} }
42
+ cat_value_dicts = {}
43
+ final_colname = uncleaned_data.columns[len(uncleaned_data.columns) - 1]
44
 
45
+ # for each column...
46
+ for (colname, colval) in uncleaned_data.iteritems():
47
 
48
+ # check if col is already a number; if so, add col directly
49
+ # to new dataframe and skip to next column
50
+ if isinstance(colval.values[0], (np.integer, float)):
51
+ data[colname] = uncleaned_data[colname].copy()
52
+ continue
53
 
54
+ # structure: {0: "lilac", 1: "blue", ...}
55
+ new_dict = {}
56
+ val = 0 # first index per column
57
+ transformed_col_vals = [] # new numeric datapoints
58
 
59
+ # if not, for each item in that column...
60
+ for (row, item) in enumerate(colval.values):
61
 
62
+ # if item is not in this col's dict...
63
+ if item not in new_dict:
64
+ new_dict[item] = val
65
+ val += 1
66
 
67
+ # then add numerical value to transformed dataframe
68
+ transformed_col_vals.append(new_dict[item])
69
 
70
+ # reverse dictionary only for final col (0, 1) => (vals)
71
+ if colname == final_colname:
72
+ new_dict = {value : key for (key, value) in new_dict.items()}
73
 
74
+ cat_value_dicts[colname] = new_dict
75
+ data[colname] = transformed_col_vals
76
 
77
 
78
  ### -------------------------------- ###
79
  ### model training ###
80
  ### -------------------------------- ###
81
 
82
+ def train_model():
83
  # select features and predicton; automatically selects last column as prediction
84
  cols = len(data.columns)
85
  num_features = cols - 1