aiEDUcurriculum commited on
Commit
7273ceb
·
verified ·
1 Parent(s): fcd9be8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -87
app.py CHANGED
@@ -1,7 +1,3 @@
1
- ### ----------------------------- ###
2
- ### libraries ###
3
- ### ----------------------------- ###
4
-
5
  import gradio as gr
6
  import pandas as pd
7
  import numpy as np
@@ -9,121 +5,111 @@ from sklearn.model_selection import train_test_split
9
  from sklearn.linear_model import LogisticRegression
10
  from sklearn import metrics
11
 
12
-
13
- ### ------------------------------ ###
14
- ### data transformation ###
15
- ### ------------------------------ ###
16
-
17
- # load dataset
18
  data = pd.read_csv('data.csv')
 
19
 
20
- # remove timestamp from dataset (first column)
21
  data = data.iloc[:, 1:]
 
 
22
 
23
- # create a copy for transformed data
24
  transformed_data = pd.DataFrame()
25
-
26
- # keep track of which columns are categorical and what
27
- # those columns' value mappings are
28
  cat_value_dicts = {}
29
- final_colname = data.columns[-1] # club recommendation
30
 
31
- # for each column...
32
  for colname in data.columns:
 
 
 
33
  if pd.api.types.is_numeric_dtype(data[colname]):
34
- transformed_data[colname] = data[colname].copy()
 
35
  continue
36
-
37
- # Create mapping for categorical variables
38
- unique_vals = data[colname].unique()
39
- val_dict = {val: idx for idx, val in enumerate(sorted(unique_vals))}
40
 
41
- # If it's the target column, store the reverse mapping
 
 
 
42
  if colname == final_colname:
43
- val_dict = {idx: val for val, idx in val_dict.items()}
 
 
 
 
 
 
44
 
45
- cat_value_dicts[colname] = val_dict
46
  transformed_data[colname] = data[colname].map(val_dict)
 
47
 
 
 
48
 
49
- ### -------------------------------- ###
50
- ### model training ###
51
- ### -------------------------------- ###
52
 
53
- # select features and prediction
54
- X = transformed_data.iloc[:, :-1] # all columns except last
55
- y = transformed_data.iloc[:, -1] # last column
56
 
57
- # split data into training and testing sets
58
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
 
 
 
 
59
 
60
- # train the model
61
- model = LogisticRegression(max_iter=1000)
 
62
  model.fit(X_train, y_train)
63
  y_pred = model.predict(X_test)
64
 
65
-
66
- ### -------------------------------- ###
67
- ### model evaluation ###
68
- ### -------------------------------- ###
69
-
70
  def get_feat():
71
  feats = [abs(x) for x in model.coef_[0]]
72
  max_val = max(feats)
73
  idx = feats.index(max_val)
74
  return data.columns[idx]
75
-
76
  acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + "%"
77
  most_imp_feat = get_feat()
78
 
79
-
80
- ### ------------------------------- ###
81
- ### predictor function ###
82
- ### ------------------------------- ###
83
-
84
  def predict(*args):
85
- features = []
86
-
87
- # transform categorical input using our mappings
88
- for colname, arg in zip(data.columns[:-1], args):
89
- if arg is None:
90
- return "Please fill in all fields"
91
-
92
- if colname in cat_value_dicts:
93
- if arg not in cat_value_dicts[colname]:
94
- return f"Invalid value for {colname}"
95
- features.append(cat_value_dicts[colname][arg])
96
- else:
97
- try:
98
- features.append(float(arg))
99
- except:
100
- return f"Invalid numeric value for {colname}"
101
-
102
- # predict using the model
103
  try:
104
- new_input = [features]
105
- result = model.predict(new_input)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  return cat_value_dicts[final_colname][result[0]]
107
  except Exception as e:
108
  return f"Error making prediction: {str(e)}"
109
 
110
-
111
- ### ------------------------------- ###
112
- ### interface creation ###
113
- ### ------------------------------- ###
114
-
115
- block = gr.Blocks()
116
-
117
- with block:
118
  gr.Markdown("# Club Recommendation System")
119
  gr.Markdown("Take the quiz to get a personalized club recommendation using AI.")
120
 
121
  with gr.Row():
122
- with gr.Column(variant="panel"): # Changed from Box to Column with panel variant
123
  inputls = []
124
-
125
- # Create input components for each feature
126
- for colname in data.columns[:-1]: # Exclude the target column
127
  if colname in cat_value_dicts:
128
  choices = list(cat_value_dicts[colname].keys())
129
  inputls.append(gr.Dropdown(
@@ -146,27 +132,25 @@ with block:
146
  gr.Markdown("<br />")
147
 
148
  with gr.Row():
149
- with gr.Column(variant="panel"): # Changed from Box to Column
150
  gr.Markdown(f"### Model Accuracy\n{acc}")
151
- with gr.Column(variant="panel"): # Changed from Box to Column
152
  gr.Markdown(f"### Most Important Feature\n{most_imp_feat}")
153
 
154
  gr.Markdown("<br />")
155
 
156
- with gr.Column(variant="panel"): # Changed from Box to Column
157
- gr.Markdown('''⭐ Note that model accuracy is based on the uploaded data.csv and reflects how well
158
- the AI model can give correct recommendations for <em>that dataset</em>. Model accuracy
159
- and most important feature can be helpful for understanding how the model works, but
160
- <em>should not be considered absolute facts about the real world</em>.''')
161
 
162
- with gr.Column(variant="panel"): # Changed from Box to Column
163
  gr.Markdown("""
164
  # About the Club Recommendation System
165
 
166
  This system uses machine learning to suggest clubs based on your preferences and personality.
167
  Fill out the questionnaire on the left to get your personalized recommendation.
168
 
169
- The system takes into account factors like:
170
  - Your social preferences
171
  - Activity preferences
172
  - Personal strengths
 
 
 
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  import numpy as np
 
5
  from sklearn.linear_model import LogisticRegression
6
  from sklearn import metrics
7
 
8
+ # Load dataset
9
+ print("Loading data...")
 
 
 
 
10
  data = pd.read_csv('data.csv')
11
+ print(f"Initial shape: {data.shape}")
12
 
13
+ # Remove timestamp and any rows with missing values
14
  data = data.iloc[:, 1:]
15
+ data = data.dropna()
16
+ print(f"Shape after removing timestamp and NaN: {data.shape}")
17
 
18
+ # Create transformed dataframe
19
  transformed_data = pd.DataFrame()
 
 
 
20
  cat_value_dicts = {}
21
+ final_colname = data.columns[-1]
22
 
23
+ print("\nProcessing columns:")
24
  for colname in data.columns:
25
+ print(f"\nColumn: {colname}")
26
+ print(f"Unique values: {data[colname].unique()}")
27
+
28
  if pd.api.types.is_numeric_dtype(data[colname]):
29
+ transformed_data[colname] = data[colname]
30
+ print("Numeric column - copied directly")
31
  continue
 
 
 
 
32
 
33
+ # Handle categorical variables
34
+ unique_vals = sorted(data[colname].dropna().unique())
35
+ print(f"Categorical values: {unique_vals}")
36
+
37
  if colname == final_colname:
38
+ # For target column, create both mappings
39
+ val_dict = {val: idx for idx, val in enumerate(unique_vals)}
40
+ cat_value_dicts[colname] = {idx: val for idx, val in enumerate(unique_vals)}
41
+ else:
42
+ # For feature columns, create forward mapping only
43
+ val_dict = {val: idx for idx, val in enumerate(unique_vals)}
44
+ cat_value_dicts[colname] = val_dict
45
 
 
46
  transformed_data[colname] = data[colname].map(val_dict)
47
+ print(f"Mapping created: {val_dict}")
48
 
49
+ print("\nChecking for NaN values in transformed data:")
50
+ print(transformed_data.isnull().sum())
51
 
52
+ # Remove any remaining NaN values
53
+ transformed_data = transformed_data.dropna()
54
+ print(f"\nFinal transformed shape: {transformed_data.shape}")
55
 
56
+ # Separate features and target
57
+ X = transformed_data.iloc[:, :-1]
58
+ y = transformed_data.iloc[:, -1]
59
 
60
+ print(f"\nFeatures shape: {X.shape}")
61
+ print(f"Target shape: {y.shape}")
62
+
63
+ # Convert to numpy arrays
64
+ X = X.to_numpy()
65
+ y = y.to_numpy()
66
 
67
+ # Split and train
68
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
69
+ model = LogisticRegression(max_iter=2000)
70
  model.fit(X_train, y_train)
71
  y_pred = model.predict(X_test)
72
 
 
 
 
 
 
73
  def get_feat():
74
  feats = [abs(x) for x in model.coef_[0]]
75
  max_val = max(feats)
76
  idx = feats.index(max_val)
77
  return data.columns[idx]
78
+
79
  acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + "%"
80
  most_imp_feat = get_feat()
81
 
 
 
 
 
 
82
  def predict(*args):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  try:
84
+ features = []
85
+ for colname, arg in zip(data.columns[:-1], args):
86
+ if arg is None or pd.isna(arg):
87
+ return "Please fill in all fields"
88
+
89
+ if colname in cat_value_dicts:
90
+ if arg not in cat_value_dicts[colname]:
91
+ return f"Invalid value for {colname}"
92
+ features.append(cat_value_dicts[colname][arg])
93
+ else:
94
+ try:
95
+ features.append(float(arg))
96
+ except:
97
+ return f"Invalid numeric value for {colname}"
98
+
99
+ result = model.predict([features])
100
  return cat_value_dicts[final_colname][result[0]]
101
  except Exception as e:
102
  return f"Error making prediction: {str(e)}"
103
 
104
+ # Create interface
105
+ with gr.Blocks() as block:
 
 
 
 
 
 
106
  gr.Markdown("# Club Recommendation System")
107
  gr.Markdown("Take the quiz to get a personalized club recommendation using AI.")
108
 
109
  with gr.Row():
110
+ with gr.Column(variant="panel"):
111
  inputls = []
112
+ for colname in data.columns[:-1]:
 
 
113
  if colname in cat_value_dicts:
114
  choices = list(cat_value_dicts[colname].keys())
115
  inputls.append(gr.Dropdown(
 
132
  gr.Markdown("<br />")
133
 
134
  with gr.Row():
135
+ with gr.Column(variant="panel"):
136
  gr.Markdown(f"### Model Accuracy\n{acc}")
137
+ with gr.Column(variant="panel"):
138
  gr.Markdown(f"### Most Important Feature\n{most_imp_feat}")
139
 
140
  gr.Markdown("<br />")
141
 
142
+ with gr.Column(variant="panel"):
143
+ gr.Markdown('''⭐ Note that model accuracy is based on the training data and reflects how well
144
+ the AI model can give correct recommendations for <em>that dataset</em>.''')
 
 
145
 
146
+ with gr.Column(variant="panel"):
147
  gr.Markdown("""
148
  # About the Club Recommendation System
149
 
150
  This system uses machine learning to suggest clubs based on your preferences and personality.
151
  Fill out the questionnaire on the left to get your personalized recommendation.
152
 
153
+ The system considers:
154
  - Your social preferences
155
  - Activity preferences
156
  - Personal strengths