Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,6 @@
|
|
3 |
### ----------------------------- ###
|
4 |
|
5 |
import gradio as gr
|
6 |
-
|
7 |
import pandas as pd
|
8 |
import numpy as np
|
9 |
from sklearn.model_selection import train_test_split
|
@@ -16,158 +15,164 @@ from sklearn import metrics
|
|
16 |
### ------------------------------ ###
|
17 |
|
18 |
# load dataset
|
19 |
-
|
|
|
|
|
|
|
20 |
|
21 |
-
#
|
22 |
-
|
23 |
-
data = pd.DataFrame()
|
24 |
|
25 |
# keep track of which columns are categorical and what
|
26 |
# those columns' value mappings are
|
27 |
-
# structure: {colname1: {...}, colname2: {...} }
|
28 |
cat_value_dicts = {}
|
29 |
-
final_colname =
|
30 |
|
31 |
# for each column...
|
32 |
-
for
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
# structure: {0: "lilac", 1: "blue", ...}
|
41 |
-
new_dict = {}
|
42 |
-
val = 0 # first index per column
|
43 |
-
transformed_col_vals = [] # new numeric datapoints
|
44 |
-
|
45 |
-
# if not, for each item in that column...
|
46 |
-
for (row, item) in enumerate(colval.values):
|
47 |
|
48 |
-
#
|
49 |
-
if
|
50 |
-
|
51 |
-
val += 1
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
# reverse dictionary only for final col (0, 1) => (vals)
|
57 |
-
if colname == final_colname:
|
58 |
-
new_dict = {value : key for (key, value) in new_dict.items()}
|
59 |
-
|
60 |
-
cat_value_dicts[colname] = new_dict
|
61 |
-
data[colname] = transformed_col_vals
|
62 |
|
63 |
|
64 |
### -------------------------------- ###
|
65 |
### model training ###
|
66 |
### -------------------------------- ###
|
67 |
|
68 |
-
# select features and
|
69 |
-
|
70 |
-
|
71 |
-
x = data.iloc[: , :num_features]
|
72 |
-
y = data.iloc[: , num_features:]
|
73 |
|
74 |
# split data into training and testing sets
|
75 |
-
|
76 |
|
77 |
-
#
|
78 |
-
model = LogisticRegression()
|
79 |
-
model.fit(
|
80 |
-
y_pred = model.predict(
|
81 |
|
82 |
|
83 |
### -------------------------------- ###
|
84 |
-
###
|
85 |
### -------------------------------- ###
|
86 |
-
# borrow file reading function from reader.py
|
87 |
|
88 |
def get_feat():
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + "%"
|
95 |
most_imp_feat = get_feat()
|
96 |
-
# info = get_article(acc, most_imp_feat)
|
97 |
-
|
98 |
|
99 |
|
100 |
### ------------------------------- ###
|
101 |
-
###
|
102 |
### ------------------------------- ###
|
103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
-
# predictor for generic number of features
|
106 |
-
def general_predictor(*args):
|
107 |
-
features = []
|
108 |
-
|
109 |
-
# transform categorical input
|
110 |
-
for colname, arg in zip(data.columns, args):
|
111 |
-
if (colname in cat_value_dicts):
|
112 |
-
features.append(cat_value_dicts[colname][arg])
|
113 |
-
else:
|
114 |
-
features.append(arg)
|
115 |
-
|
116 |
-
# predict single datapoint
|
117 |
-
new_input = [features]
|
118 |
-
result = model.predict(new_input)
|
119 |
-
return cat_value_dicts[final_colname][result[0]]
|
120 |
-
|
121 |
-
# add data labels to replace those lost via star-args
|
122 |
|
|
|
|
|
|
|
123 |
|
124 |
block = gr.Blocks()
|
125 |
|
126 |
-
with
|
127 |
-
|
128 |
-
gr.Markdown(
|
129 |
-
gr.Markdown('Take the quiz to get a personalized recommendation using AI.')
|
130 |
|
131 |
with gr.Row():
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
gr.
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
|
|
|
|
|
|
|
|
|
|
166 |
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
|
172 |
-
# show the interface
|
173 |
block.launch()
|
|
|
3 |
### ----------------------------- ###
|
4 |
|
5 |
import gradio as gr
|
|
|
6 |
import pandas as pd
|
7 |
import numpy as np
|
8 |
from sklearn.model_selection import train_test_split
|
|
|
15 |
### ------------------------------ ###
|
16 |
|
17 |
# load dataset
|
18 |
+
data = pd.read_csv('data.csv')
|
19 |
+
|
20 |
+
# remove timestamp from dataset (first column)
|
21 |
+
data = data.iloc[:, 1:]
|
22 |
|
23 |
+
# create a copy for transformed data
|
24 |
+
transformed_data = pd.DataFrame()
|
|
|
25 |
|
26 |
# keep track of which columns are categorical and what
|
27 |
# those columns' value mappings are
|
|
|
28 |
cat_value_dicts = {}
|
29 |
+
final_colname = data.columns[-1] # club recommendation
|
30 |
|
31 |
# for each column...
|
32 |
+
for colname in data.columns:
|
33 |
+
if pd.api.types.is_numeric_dtype(data[colname]):
|
34 |
+
transformed_data[colname] = data[colname].copy()
|
35 |
+
continue
|
36 |
+
|
37 |
+
# Create mapping for categorical variables
|
38 |
+
unique_vals = data[colname].unique()
|
39 |
+
val_dict = {val: idx for idx, val in enumerate(sorted(unique_vals))}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
+
# If it's the target column, store the reverse mapping
|
42 |
+
if colname == final_colname:
|
43 |
+
val_dict = {idx: val for val, idx in val_dict.items()}
|
|
|
44 |
|
45 |
+
cat_value_dicts[colname] = val_dict
|
46 |
+
transformed_data[colname] = data[colname].map(val_dict)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
|
49 |
### -------------------------------- ###
|
50 |
### model training ###
|
51 |
### -------------------------------- ###
|
52 |
|
53 |
+
# select features and prediction
|
54 |
+
X = transformed_data.iloc[:, :-1] # all columns except last
|
55 |
+
y = transformed_data.iloc[:, -1] # last column
|
|
|
|
|
56 |
|
57 |
# split data into training and testing sets
|
58 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
|
59 |
|
60 |
+
# train the model
|
61 |
+
model = LogisticRegression(max_iter=1000)
|
62 |
+
model.fit(X_train, y_train)
|
63 |
+
y_pred = model.predict(X_test)
|
64 |
|
65 |
|
66 |
### -------------------------------- ###
|
67 |
+
### model evaluation ###
|
68 |
### -------------------------------- ###
|
|
|
69 |
|
70 |
def get_feat():
|
71 |
+
feats = [abs(x) for x in model.coef_[0]]
|
72 |
+
max_val = max(feats)
|
73 |
+
idx = feats.index(max_val)
|
74 |
+
return data.columns[idx]
|
75 |
+
|
76 |
acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + "%"
|
77 |
most_imp_feat = get_feat()
|
|
|
|
|
78 |
|
79 |
|
80 |
### ------------------------------- ###
|
81 |
+
### predictor function ###
|
82 |
### ------------------------------- ###
|
83 |
|
84 |
+
def predict(*args):
|
85 |
+
features = []
|
86 |
+
|
87 |
+
# transform categorical input using our mappings
|
88 |
+
for colname, arg in zip(data.columns[:-1], args):
|
89 |
+
if arg is None:
|
90 |
+
return "Please fill in all fields"
|
91 |
+
|
92 |
+
if colname in cat_value_dicts:
|
93 |
+
if arg not in cat_value_dicts[colname]:
|
94 |
+
return f"Invalid value for {colname}"
|
95 |
+
features.append(cat_value_dicts[colname][arg])
|
96 |
+
else:
|
97 |
+
try:
|
98 |
+
features.append(float(arg))
|
99 |
+
except:
|
100 |
+
return f"Invalid numeric value for {colname}"
|
101 |
+
|
102 |
+
# predict using the model
|
103 |
+
try:
|
104 |
+
new_input = [features]
|
105 |
+
result = model.predict(new_input)
|
106 |
+
return cat_value_dicts[final_colname][result[0]]
|
107 |
+
except Exception as e:
|
108 |
+
return f"Error making prediction: {str(e)}"
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
+
### ------------------------------- ###
|
112 |
+
### interface creation ###
|
113 |
+
### ------------------------------- ###
|
114 |
|
115 |
block = gr.Blocks()
|
116 |
|
117 |
+
with block:
|
118 |
+
gr.Markdown("# Club Recommendation System")
|
119 |
+
gr.Markdown("Take the quiz to get a personalized club recommendation using AI.")
|
|
|
120 |
|
121 |
with gr.Row():
|
122 |
+
with gr.Column(variant="panel"): # Changed from Box to Column with panel variant
|
123 |
+
inputls = []
|
124 |
+
|
125 |
+
# Create input components for each feature
|
126 |
+
for colname in data.columns[:-1]: # Exclude the target column
|
127 |
+
if colname in cat_value_dicts:
|
128 |
+
choices = list(cat_value_dicts[colname].keys())
|
129 |
+
inputls.append(gr.Dropdown(
|
130 |
+
choices=choices,
|
131 |
+
label=colname,
|
132 |
+
type="value"
|
133 |
+
))
|
134 |
+
else:
|
135 |
+
inputls.append(gr.Number(label=colname))
|
136 |
+
gr.Markdown("<br />")
|
137 |
+
|
138 |
+
submit = gr.Button("Click to see your personalized result!", variant="primary")
|
139 |
+
gr.Markdown("<br />")
|
140 |
+
output = gr.Textbox(
|
141 |
+
label="Your recommendation:",
|
142 |
+
placeholder="your recommendation will appear here"
|
143 |
+
)
|
144 |
+
|
145 |
+
submit.click(fn=predict, inputs=inputls, outputs=output)
|
146 |
+
gr.Markdown("<br />")
|
147 |
+
|
148 |
+
with gr.Row():
|
149 |
+
with gr.Column(variant="panel"): # Changed from Box to Column
|
150 |
+
gr.Markdown(f"### Model Accuracy\n{acc}")
|
151 |
+
with gr.Column(variant="panel"): # Changed from Box to Column
|
152 |
+
gr.Markdown(f"### Most Important Feature\n{most_imp_feat}")
|
153 |
+
|
154 |
+
gr.Markdown("<br />")
|
155 |
+
|
156 |
+
with gr.Column(variant="panel"): # Changed from Box to Column
|
157 |
+
gr.Markdown('''⭐ Note that model accuracy is based on the uploaded data.csv and reflects how well
|
158 |
+
the AI model can give correct recommendations for <em>that dataset</em>. Model accuracy
|
159 |
+
and most important feature can be helpful for understanding how the model works, but
|
160 |
+
<em>should not be considered absolute facts about the real world</em>.''')
|
161 |
|
162 |
+
with gr.Column(variant="panel"): # Changed from Box to Column
|
163 |
+
gr.Markdown("""
|
164 |
+
# About the Club Recommendation System
|
165 |
+
|
166 |
+
This system uses machine learning to suggest clubs based on your preferences and personality.
|
167 |
+
Fill out the questionnaire on the left to get your personalized recommendation.
|
168 |
+
|
169 |
+
The system takes into account factors like:
|
170 |
+
- Your social preferences
|
171 |
+
- Activity preferences
|
172 |
+
- Personal strengths
|
173 |
+
- Time commitment
|
174 |
+
|
175 |
+
Remember that this is just a suggestion - you should always choose the club that interests you most!
|
176 |
+
""")
|
177 |
|
|
|
178 |
block.launch()
|