Spaces:

aiEDUcurriculum
/

introtoAI-clubs-project

Running

App Files Files Community

aiEDUcurriculum commited on 27 days ago

Commit

fcd9be8

verified ·

1 Parent(s): 1597643

Update app.py

Browse files

Files changed (1) hide show

app.py +120 -115

app.py CHANGED Viewed

@@ -3,7 +3,6 @@
 ### ----------------------------- ###
 import gradio as gr
 import pandas as pd
 import numpy as np
 from sklearn.model_selection import train_test_split
@@ -16,158 +15,164 @@ from sklearn import metrics
 ### ------------------------------ ###
 # load dataset
-uncleaned_data = pd.read_csv('data.csv')
-# remove timestamp from dataset (always first column)
-uncleaned_data = uncleaned_data.iloc[: , 1:]
-data = pd.DataFrame()
 # keep track of which columns are categorical and what
 # those columns' value mappings are
-# structure: {colname1: {...}, colname2: {...} }
 cat_value_dicts = {}
-final_colname = uncleaned_data.columns[len(uncleaned_data.columns) - 1]
 # for each column...
-for (colname, colval) in uncleaned_data.iteritems():
-  # check if col is already a number; if so, add col directly
-  # to new dataframe and skip to next column
-  if isinstance(colval.values[0], (np.integer, float)):
-    data[colname] = uncleaned_data[colname].copy()
-    continue
-  # structure: {0: "lilac", 1: "blue", ...}
-  new_dict = {}
-  val = 0 # first index per column
-  transformed_col_vals = [] # new numeric datapoints
-  # if not, for each item in that column...
-  for (row, item) in enumerate(colval.values):
-    # if item is not in this col's dict...
-    if item not in new_dict:
-      new_dict[item] = val
-      val += 1
-    # then add numerical value to transformed dataframe
-    transformed_col_vals.append(new_dict[item])
-  # reverse dictionary only for final col (0, 1) => (vals)
-  if colname == final_colname:
-    new_dict = {value : key for (key, value) in new_dict.items()}
-  cat_value_dicts[colname] = new_dict
-  data[colname] = transformed_col_vals
 ### -------------------------------- ###
 ###           model training         ###
 ### -------------------------------- ###
-# select features and predicton; automatically selects last column as prediction
-cols = len(data.columns)
-num_features = cols - 1
-x = data.iloc[: , :num_features]
-y = data.iloc[: , num_features:]
 # split data into training and testing sets
-x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)
-# instantiate the model (using default parameters)
-model = LogisticRegression()
-model.fit(x_train, y_train.values.ravel())
-y_pred = model.predict(x_test)
 ### -------------------------------- ###
-###        article generation        ###
 ### -------------------------------- ###
-# borrow file reading function from reader.py
 def get_feat():
-  feats = [abs(x) for x in model.coef_[0]]
-  max_val = max(feats)
-  idx = feats.index(max_val)
-  return data.columns[idx]
 acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + "%"
 most_imp_feat = get_feat()
-# info = get_article(acc, most_imp_feat)
 ### ------------------------------- ###
-###        interface creation       ###
 ### ------------------------------- ###
-# predictor for generic number of features
-def general_predictor(*args):
-  features = []
-  # transform categorical input
-  for colname, arg in zip(data.columns, args):
-    if (colname in cat_value_dicts):
-      features.append(cat_value_dicts[colname][arg])
-    else:
-      features.append(arg)
-  # predict single datapoint
-  new_input = [features]
-  result = model.predict(new_input)
-  return cat_value_dicts[final_colname][result[0]]
-# add data labels to replace those lost via star-args
 block = gr.Blocks()
-with open('info.md') as f:
-  with block:
-    gr.Markdown(f.readline())
-    gr.Markdown('Take the quiz to get a personalized recommendation using AI.')
     with gr.Row():
-      with gr.Box():
-        inputls = []
-        for colname in data.columns:
-          # skip last column
-          if colname == final_colname:
-            continue
-          # access categories dict if data is categorical
-          # otherwise, just use a number input
-          if colname in cat_value_dicts:
-            radio_options = list(cat_value_dicts[colname].keys())
-            inputls.append(gr.Dropdown(choices=radio_options, type="value", label=colname))
-          else:
-            # add numerical input
-            inputls.append(gr.Number(label=colname))
-          gr.Markdown("<br />")
-        submit = gr.Button("Click to see your personalized result!", variant="primary")
-        gr.Markdown("<br />")
-        output = gr.Textbox(label="Your recommendation:", placeholder="your recommendation will appear here")
-        submit.click(fn=general_predictor, inputs=inputls, outputs=output)
-        gr.Markdown("<br />")
-        with gr.Row():
-          with gr.Box():
-            gr.Markdown(f"<h3>Accuracy: </h3>{acc}")
-          with gr.Box():
-            gr.Markdown(f"<h3>Most important feature: </h3>{most_imp_feat}")
-        gr.Markdown("<br />")
-        with gr.Box():
-          gr.Markdown('''⭐ Note that model accuracy is based on the uploaded data.csv and reflects how well the AI model can give correct recommendations for <em>that dataset</em>. Model accuracy and most important feature can be helpful for understanding how the model works, but <em>should not be considered absolute facts about the real world</em>.''')
-      with gr.Box():
-        with open('info.md') as f:
-          f.readline()
-          gr.Markdown(f.read())
-# show the interface
 block.launch()

 ### ----------------------------- ###
 import gradio as gr
 import pandas as pd
 import numpy as np
 from sklearn.model_selection import train_test_split
 ### ------------------------------ ###
 # load dataset
+data = pd.read_csv('data.csv')
+# remove timestamp from dataset (first column)
+data = data.iloc[:, 1:]
+# create a copy for transformed data
+transformed_data = pd.DataFrame()
 # keep track of which columns are categorical and what
 # those columns' value mappings are
 cat_value_dicts = {}
+final_colname = data.columns[-1]  # club recommendation
 # for each column...
+for colname in data.columns:
+    if pd.api.types.is_numeric_dtype(data[colname]):
+        transformed_data[colname] = data[colname].copy()
+        continue
+    # Create mapping for categorical variables
+    unique_vals = data[colname].unique()
+    val_dict = {val: idx for idx, val in enumerate(sorted(unique_vals))}
+    # If it's the target column, store the reverse mapping
+    if colname == final_colname:
+        val_dict = {idx: val for val, idx in val_dict.items()}
+    cat_value_dicts[colname] = val_dict
+    transformed_data[colname] = data[colname].map(val_dict)
 ### -------------------------------- ###
 ###           model training         ###
 ### -------------------------------- ###
+# select features and prediction
+X = transformed_data.iloc[:, :-1]  # all columns except last
+y = transformed_data.iloc[:, -1]   # last column
 # split data into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
+# train the model
+model = LogisticRegression(max_iter=1000)
+model.fit(X_train, y_train)
+y_pred = model.predict(X_test)
 ### -------------------------------- ###
+###        model evaluation         ###
 ### -------------------------------- ###
 def get_feat():
+    feats = [abs(x) for x in model.coef_[0]]
+    max_val = max(feats)
+    idx = feats.index(max_val)
+    return data.columns[idx]
 acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + "%"
 most_imp_feat = get_feat()
 ### ------------------------------- ###
+###        predictor function      ###
 ### ------------------------------- ###
+def predict(*args):
+    features = []
+    # transform categorical input using our mappings
+    for colname, arg in zip(data.columns[:-1], args):
+        if arg is None:
+            return "Please fill in all fields"
+        if colname in cat_value_dicts:
+            if arg not in cat_value_dicts[colname]:
+                return f"Invalid value for {colname}"
+            features.append(cat_value_dicts[colname][arg])
+        else:
+            try:
+                features.append(float(arg))
+            except:
+                return f"Invalid numeric value for {colname}"
+    # predict using the model
+    try:
+        new_input = [features]
+        result = model.predict(new_input)
+        return cat_value_dicts[final_colname][result[0]]
+    except Exception as e:
+        return f"Error making prediction: {str(e)}"
+### ------------------------------- ###
+###        interface creation      ###
+### ------------------------------- ###
 block = gr.Blocks()
+with block:
+    gr.Markdown("# Club Recommendation System")
+    gr.Markdown("Take the quiz to get a personalized club recommendation using AI.")
     with gr.Row():
+        with gr.Column(variant="panel"):  # Changed from Box to Column with panel variant
+            inputls = []
+            # Create input components for each feature
+            for colname in data.columns[:-1]:  # Exclude the target column
+                if colname in cat_value_dicts:
+                    choices = list(cat_value_dicts[colname].keys())
+                    inputls.append(gr.Dropdown(
+                        choices=choices,
+                        label=colname,
+                        type="value"
+                    ))
+                else:
+                    inputls.append(gr.Number(label=colname))
+                gr.Markdown("<br />")
+            submit = gr.Button("Click to see your personalized result!", variant="primary")
+            gr.Markdown("<br />")
+            output = gr.Textbox(
+                label="Your recommendation:",
+                placeholder="your recommendation will appear here"
+            )
+            submit.click(fn=predict, inputs=inputls, outputs=output)
+            gr.Markdown("<br />")
+            with gr.Row():
+                with gr.Column(variant="panel"):  # Changed from Box to Column
+                    gr.Markdown(f"### Model Accuracy\n{acc}")
+                with gr.Column(variant="panel"):  # Changed from Box to Column
+                    gr.Markdown(f"### Most Important Feature\n{most_imp_feat}")
+            gr.Markdown("<br />")
+            with gr.Column(variant="panel"):  # Changed from Box to Column
+                gr.Markdown('''⭐ Note that model accuracy is based on the uploaded data.csv and reflects how well
+                           the AI model can give correct recommendations for <em>that dataset</em>. Model accuracy
+                           and most important feature can be helpful for understanding how the model works, but
+                           <em>should not be considered absolute facts about the real world</em>.''')
+        with gr.Column(variant="panel"):  # Changed from Box to Column
+            gr.Markdown("""
+            # About the Club Recommendation System
+            This system uses machine learning to suggest clubs based on your preferences and personality.
+            Fill out the questionnaire on the left to get your personalized recommendation.
+            The system takes into account factors like:
+            - Your social preferences
+            - Activity preferences
+            - Personal strengths
+            - Time commitment
+            Remember that this is just a suggestion - you should always choose the club that interests you most!
+            """)
 block.launch()