tedgwara commited on
Commit
6832fea
1 Parent(s): bfd08dd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -0
app.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.linear_model import LogisticRegression
5
+ from sklearn import metrics
6
+ from reader import get_article
7
+
8
+
9
+ ### ------------------------------ ###
10
+ ### data transformation ###
11
+ ### ------------------------------ ###
12
+ # options constants
13
+ options = [
14
+ ['Very Poorly Aligned', 'Poorly Aligned', 'Neutrally Aligned', 'Well Aligned', 'Very Well Aligned'],
15
+ ['Very Limited Experience', 'Limited Experience', 'Neutral Experience', 'Extensive Experience', 'Very Extensive Experience'],
16
+ ['Extremely Unattractive', 'Moderately Unattractive', 'Neutrally Attractive', 'Moderately Attractive', 'Extremely Attractive'],
17
+ ['Very Unfavorable', 'Moderately Unfavorable', 'Neutrally Favorable', 'Moderately Favorable', 'Very Favorable'],
18
+ ['Very Poor Fit', 'Poor Fit', 'Neutral Fit', 'Moderately Good Fit', 'Excellent Fit']
19
+ ]
20
+
21
+ # load dataset
22
+ uncleaned_data = pd.read_csv('data.csv')
23
+ data = pd.DataFrame()
24
+
25
+ # keep track of which columns are categorical and what
26
+ # those columns' value mappings are
27
+ # structure: {colname1: {...}, colname2: {...} }
28
+ cat_value_dicts = {}
29
+ col = 0
30
+ final_colname = uncleaned_data.columns[4]
31
+
32
+ # for each column...
33
+ for (colname, colval) in uncleaned_data.iteritems():
34
+
35
+ # structure: {0: "lilac", 1: "blue", ...}
36
+ new_dict = {}
37
+ transformed_col_vals = [] # new numeric datapoints
38
+
39
+ # if not, for each item in that column...
40
+ for (row, item) in enumerate(colval.values):
41
+
42
+ # if item is not in this col's dict...
43
+ if item not in new_dict:
44
+ new_dict[item] = options[col].index(item)
45
+
46
+ # then add numerical value to transformed dataframe
47
+ transformed_col_vals.append(new_dict[item])
48
+
49
+ # reverse dictionary only for final col (0, 1) => (vals)
50
+ if colname == final_colname:
51
+ new_dict = {value : key for (key, value) in new_dict.items()}
52
+
53
+ cat_value_dicts[colname] = new_dict
54
+ data[colname] = transformed_col_vals
55
+ col += 1
56
+
57
+
58
+ ### -------------------------------- ###
59
+ ### model training ###
60
+ ### -------------------------------- ###
61
+
62
+ # select features and predicton; automatically selects last column as prediction
63
+ num_features = 4
64
+ x = data.iloc[: , :num_features]
65
+ y = data.iloc[: , num_features:]
66
+
67
+ # split data into training and testing sets
68
+ x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)
69
+
70
+ # instantiate the model (using default parameters)
71
+ model = LogisticRegression(max_iter=100)
72
+ model.fit(x_train, y_train.values.ravel())
73
+ y_pred = model.predict(x_test)
74
+
75
+
76
+ ### -------------------------------- ###
77
+ ### article generation ###
78
+ ### -------------------------------- ###
79
+ # borrow file reading function from reader.py
80
+
81
+ def get_feats():
82
+ feats = [abs(x) for x in model.coef_[0]]
83
+ feats, cols = zip(*sorted(zip(feats, data.columns)))
84
+
85
+ output = []
86
+
87
+ for idx, col in enumerate(reversed(cols)):
88
+ output.append(col)
89
+
90
+ # max_val = max(feats)
91
+ # idx = feats.index(max_val)
92
+ # return data.columns[idx]
93
+ return output
94
+
95
+ acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 2)) + '%'
96
+ feats = get_feats()
97
+ info = get_article(acc, feats)
98
+
99
+
100
+
101
+ ### ------------------------------- ###
102
+ ### interface creation ###
103
+ ### ------------------------------- ###
104
+
105
+ def predictor(*args):
106
+ features = []
107
+
108
+ # transform categorical input
109
+ for num, col in enumerate(args):
110
+ features.append(cat_value_dicts[data.columns[num]][col])
111
+
112
+ # predict single datapoint
113
+ new_input = [features]
114
+ result = model.predict(new_input)
115
+ return cat_value_dicts[final_colname][result[0]]
116
+
117
+ # add data labels to replace those lost via star-args
118
+ inputls = []
119
+ labels = [
120
+ "How Well Do They Align with RS21's 9 Core Values?",
121
+ "How Experienced Are They in RS21's Markets?",
122
+ "How Attractive is Their Valuation of RS21?",
123
+ "How Favorable is Their Proposed Deal Structure for RS21?"
124
+ ]
125
+
126
+ for num, colname in enumerate(labels):
127
+
128
+ # access categories dict if data is categorical
129
+ inputls.append(gr.inputs.Radio(choices=options[num], type="value", label=labels[num]))
130
+
131
+
132
+ # generate gradio interface
133
+ interface = gr.Interface(predictor, inputs=inputls, outputs="text", article=info['article'], css=info['css'], theme="grass", title=info['title'], allow_flagging='never', description=info['description'])
134
+
135
+ # show the interface
136
+ interface.launch()