aiEDUcurriculum commited on
Commit
fcd9be8
·
verified ·
1 Parent(s): 1597643

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -115
app.py CHANGED
@@ -3,7 +3,6 @@
3
  ### ----------------------------- ###
4
 
5
  import gradio as gr
6
-
7
  import pandas as pd
8
  import numpy as np
9
  from sklearn.model_selection import train_test_split
@@ -16,158 +15,164 @@ from sklearn import metrics
16
  ### ------------------------------ ###
17
 
18
  # load dataset
19
- uncleaned_data = pd.read_csv('data.csv')
 
 
 
20
 
21
- # remove timestamp from dataset (always first column)
22
- uncleaned_data = uncleaned_data.iloc[: , 1:]
23
- data = pd.DataFrame()
24
 
25
  # keep track of which columns are categorical and what
26
  # those columns' value mappings are
27
- # structure: {colname1: {...}, colname2: {...} }
28
  cat_value_dicts = {}
29
- final_colname = uncleaned_data.columns[len(uncleaned_data.columns) - 1]
30
 
31
  # for each column...
32
- for (colname, colval) in uncleaned_data.iteritems():
33
-
34
- # check if col is already a number; if so, add col directly
35
- # to new dataframe and skip to next column
36
- if isinstance(colval.values[0], (np.integer, float)):
37
- data[colname] = uncleaned_data[colname].copy()
38
- continue
39
-
40
- # structure: {0: "lilac", 1: "blue", ...}
41
- new_dict = {}
42
- val = 0 # first index per column
43
- transformed_col_vals = [] # new numeric datapoints
44
-
45
- # if not, for each item in that column...
46
- for (row, item) in enumerate(colval.values):
47
 
48
- # if item is not in this col's dict...
49
- if item not in new_dict:
50
- new_dict[item] = val
51
- val += 1
52
 
53
- # then add numerical value to transformed dataframe
54
- transformed_col_vals.append(new_dict[item])
55
-
56
- # reverse dictionary only for final col (0, 1) => (vals)
57
- if colname == final_colname:
58
- new_dict = {value : key for (key, value) in new_dict.items()}
59
-
60
- cat_value_dicts[colname] = new_dict
61
- data[colname] = transformed_col_vals
62
 
63
 
64
  ### -------------------------------- ###
65
  ### model training ###
66
  ### -------------------------------- ###
67
 
68
- # select features and predicton; automatically selects last column as prediction
69
- cols = len(data.columns)
70
- num_features = cols - 1
71
- x = data.iloc[: , :num_features]
72
- y = data.iloc[: , num_features:]
73
 
74
  # split data into training and testing sets
75
- x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)
76
 
77
- # instantiate the model (using default parameters)
78
- model = LogisticRegression()
79
- model.fit(x_train, y_train.values.ravel())
80
- y_pred = model.predict(x_test)
81
 
82
 
83
  ### -------------------------------- ###
84
- ### article generation ###
85
  ### -------------------------------- ###
86
- # borrow file reading function from reader.py
87
 
88
  def get_feat():
89
- feats = [abs(x) for x in model.coef_[0]]
90
- max_val = max(feats)
91
- idx = feats.index(max_val)
92
- return data.columns[idx]
93
-
94
  acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + "%"
95
  most_imp_feat = get_feat()
96
- # info = get_article(acc, most_imp_feat)
97
-
98
 
99
 
100
  ### ------------------------------- ###
101
- ### interface creation ###
102
  ### ------------------------------- ###
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
- # predictor for generic number of features
106
- def general_predictor(*args):
107
- features = []
108
-
109
- # transform categorical input
110
- for colname, arg in zip(data.columns, args):
111
- if (colname in cat_value_dicts):
112
- features.append(cat_value_dicts[colname][arg])
113
- else:
114
- features.append(arg)
115
-
116
- # predict single datapoint
117
- new_input = [features]
118
- result = model.predict(new_input)
119
- return cat_value_dicts[final_colname][result[0]]
120
-
121
- # add data labels to replace those lost via star-args
122
 
 
 
 
123
 
124
  block = gr.Blocks()
125
 
126
- with open('info.md') as f:
127
- with block:
128
- gr.Markdown(f.readline())
129
- gr.Markdown('Take the quiz to get a personalized recommendation using AI.')
130
 
131
  with gr.Row():
132
- with gr.Box():
133
- inputls = []
134
- for colname in data.columns:
135
- # skip last column
136
- if colname == final_colname:
137
- continue
138
-
139
- # access categories dict if data is categorical
140
- # otherwise, just use a number input
141
- if colname in cat_value_dicts:
142
- radio_options = list(cat_value_dicts[colname].keys())
143
- inputls.append(gr.Dropdown(choices=radio_options, type="value", label=colname))
144
- else:
145
- # add numerical input
146
- inputls.append(gr.Number(label=colname))
147
- gr.Markdown("<br />")
148
-
149
- submit = gr.Button("Click to see your personalized result!", variant="primary")
150
- gr.Markdown("<br />")
151
- output = gr.Textbox(label="Your recommendation:", placeholder="your recommendation will appear here")
152
-
153
- submit.click(fn=general_predictor, inputs=inputls, outputs=output)
154
- gr.Markdown("<br />")
155
-
156
- with gr.Row():
157
- with gr.Box():
158
- gr.Markdown(f"<h3>Accuracy: </h3>{acc}")
159
- with gr.Box():
160
- gr.Markdown(f"<h3>Most important feature: </h3>{most_imp_feat}")
161
-
162
- gr.Markdown("<br />")
163
-
164
- with gr.Box():
165
- gr.Markdown('''⭐ Note that model accuracy is based on the uploaded data.csv and reflects how well the AI model can give correct recommendations for <em>that dataset</em>. Model accuracy and most important feature can be helpful for understanding how the model works, but <em>should not be considered absolute facts about the real world</em>.''')
 
 
 
 
 
166
 
167
- with gr.Box():
168
- with open('info.md') as f:
169
- f.readline()
170
- gr.Markdown(f.read())
 
 
 
 
 
 
 
 
 
 
 
171
 
172
- # show the interface
173
  block.launch()
 
3
  ### ----------------------------- ###
4
 
5
  import gradio as gr
 
6
  import pandas as pd
7
  import numpy as np
8
  from sklearn.model_selection import train_test_split
 
15
  ### ------------------------------ ###
16
 
17
  # load dataset
18
+ data = pd.read_csv('data.csv')
19
+
20
+ # remove timestamp from dataset (first column)
21
+ data = data.iloc[:, 1:]
22
 
23
+ # create a copy for transformed data
24
+ transformed_data = pd.DataFrame()
 
25
 
26
  # keep track of which columns are categorical and what
27
  # those columns' value mappings are
 
28
  cat_value_dicts = {}
29
+ final_colname = data.columns[-1] # club recommendation
30
 
31
  # for each column...
32
+ for colname in data.columns:
33
+ if pd.api.types.is_numeric_dtype(data[colname]):
34
+ transformed_data[colname] = data[colname].copy()
35
+ continue
36
+
37
+ # Create mapping for categorical variables
38
+ unique_vals = data[colname].unique()
39
+ val_dict = {val: idx for idx, val in enumerate(sorted(unique_vals))}
 
 
 
 
 
 
 
40
 
41
+ # If it's the target column, store the reverse mapping
42
+ if colname == final_colname:
43
+ val_dict = {idx: val for val, idx in val_dict.items()}
 
44
 
45
+ cat_value_dicts[colname] = val_dict
46
+ transformed_data[colname] = data[colname].map(val_dict)
 
 
 
 
 
 
 
47
 
48
 
49
  ### -------------------------------- ###
50
  ### model training ###
51
  ### -------------------------------- ###
52
 
53
+ # select features and prediction
54
+ X = transformed_data.iloc[:, :-1] # all columns except last
55
+ y = transformed_data.iloc[:, -1] # last column
 
 
56
 
57
  # split data into training and testing sets
58
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
59
 
60
+ # train the model
61
+ model = LogisticRegression(max_iter=1000)
62
+ model.fit(X_train, y_train)
63
+ y_pred = model.predict(X_test)
64
 
65
 
66
  ### -------------------------------- ###
67
+ ### model evaluation ###
68
  ### -------------------------------- ###
 
69
 
70
  def get_feat():
71
+ feats = [abs(x) for x in model.coef_[0]]
72
+ max_val = max(feats)
73
+ idx = feats.index(max_val)
74
+ return data.columns[idx]
75
+
76
  acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + "%"
77
  most_imp_feat = get_feat()
 
 
78
 
79
 
80
  ### ------------------------------- ###
81
+ ### predictor function ###
82
  ### ------------------------------- ###
83
 
84
+ def predict(*args):
85
+ features = []
86
+
87
+ # transform categorical input using our mappings
88
+ for colname, arg in zip(data.columns[:-1], args):
89
+ if arg is None:
90
+ return "Please fill in all fields"
91
+
92
+ if colname in cat_value_dicts:
93
+ if arg not in cat_value_dicts[colname]:
94
+ return f"Invalid value for {colname}"
95
+ features.append(cat_value_dicts[colname][arg])
96
+ else:
97
+ try:
98
+ features.append(float(arg))
99
+ except:
100
+ return f"Invalid numeric value for {colname}"
101
+
102
+ # predict using the model
103
+ try:
104
+ new_input = [features]
105
+ result = model.predict(new_input)
106
+ return cat_value_dicts[final_colname][result[0]]
107
+ except Exception as e:
108
+ return f"Error making prediction: {str(e)}"
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
+ ### ------------------------------- ###
112
+ ### interface creation ###
113
+ ### ------------------------------- ###
114
 
115
  block = gr.Blocks()
116
 
117
+ with block:
118
+ gr.Markdown("# Club Recommendation System")
119
+ gr.Markdown("Take the quiz to get a personalized club recommendation using AI.")
 
120
 
121
  with gr.Row():
122
+ with gr.Column(variant="panel"): # Changed from Box to Column with panel variant
123
+ inputls = []
124
+
125
+ # Create input components for each feature
126
+ for colname in data.columns[:-1]: # Exclude the target column
127
+ if colname in cat_value_dicts:
128
+ choices = list(cat_value_dicts[colname].keys())
129
+ inputls.append(gr.Dropdown(
130
+ choices=choices,
131
+ label=colname,
132
+ type="value"
133
+ ))
134
+ else:
135
+ inputls.append(gr.Number(label=colname))
136
+ gr.Markdown("<br />")
137
+
138
+ submit = gr.Button("Click to see your personalized result!", variant="primary")
139
+ gr.Markdown("<br />")
140
+ output = gr.Textbox(
141
+ label="Your recommendation:",
142
+ placeholder="your recommendation will appear here"
143
+ )
144
+
145
+ submit.click(fn=predict, inputs=inputls, outputs=output)
146
+ gr.Markdown("<br />")
147
+
148
+ with gr.Row():
149
+ with gr.Column(variant="panel"): # Changed from Box to Column
150
+ gr.Markdown(f"### Model Accuracy\n{acc}")
151
+ with gr.Column(variant="panel"): # Changed from Box to Column
152
+ gr.Markdown(f"### Most Important Feature\n{most_imp_feat}")
153
+
154
+ gr.Markdown("<br />")
155
+
156
+ with gr.Column(variant="panel"): # Changed from Box to Column
157
+ gr.Markdown('''⭐ Note that model accuracy is based on the uploaded data.csv and reflects how well
158
+ the AI model can give correct recommendations for <em>that dataset</em>. Model accuracy
159
+ and most important feature can be helpful for understanding how the model works, but
160
+ <em>should not be considered absolute facts about the real world</em>.''')
161
 
162
+ with gr.Column(variant="panel"): # Changed from Box to Column
163
+ gr.Markdown("""
164
+ # About the Club Recommendation System
165
+
166
+ This system uses machine learning to suggest clubs based on your preferences and personality.
167
+ Fill out the questionnaire on the left to get your personalized recommendation.
168
+
169
+ The system takes into account factors like:
170
+ - Your social preferences
171
+ - Activity preferences
172
+ - Personal strengths
173
+ - Time commitment
174
+
175
+ Remember that this is just a suggestion - you should always choose the club that interests you most!
176
+ """)
177
 
 
178
  block.launch()