|
import gradio as gr |
|
import pandas as pd |
|
import numpy as np |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.linear_model import LogisticRegression |
|
from sklearn import metrics |
|
|
|
|
|
print("Loading data...") |
|
data = pd.read_csv('data.csv') |
|
print(f"Initial shape: {data.shape}") |
|
|
|
|
|
data = data.iloc[:, 1:] |
|
data = data.dropna() |
|
print(f"Shape after removing timestamp and NaN: {data.shape}") |
|
|
|
|
|
transformed_data = pd.DataFrame() |
|
cat_value_dicts = {} |
|
final_colname = data.columns[-1] |
|
|
|
print("\nProcessing columns:") |
|
for colname in data.columns: |
|
print(f"\nColumn: {colname}") |
|
print(f"Unique values: {data[colname].unique()}") |
|
|
|
if pd.api.types.is_numeric_dtype(data[colname]): |
|
transformed_data[colname] = data[colname] |
|
print("Numeric column - copied directly") |
|
continue |
|
|
|
|
|
unique_vals = sorted(data[colname].dropna().unique()) |
|
print(f"Categorical values: {unique_vals}") |
|
|
|
if colname == final_colname: |
|
|
|
val_dict = {val: idx for idx, val in enumerate(unique_vals)} |
|
cat_value_dicts[colname] = {idx: val for idx, val in enumerate(unique_vals)} |
|
else: |
|
|
|
val_dict = {val: idx for idx, val in enumerate(unique_vals)} |
|
cat_value_dicts[colname] = val_dict |
|
|
|
transformed_data[colname] = data[colname].map(val_dict) |
|
print(f"Mapping created: {val_dict}") |
|
|
|
print("\nChecking for NaN values in transformed data:") |
|
print(transformed_data.isnull().sum()) |
|
|
|
|
|
transformed_data = transformed_data.dropna() |
|
print(f"\nFinal transformed shape: {transformed_data.shape}") |
|
|
|
|
|
X = transformed_data.iloc[:, :-1] |
|
y = transformed_data.iloc[:, -1] |
|
|
|
print(f"\nFeatures shape: {X.shape}") |
|
print(f"Target shape: {y.shape}") |
|
|
|
|
|
X = X.to_numpy() |
|
y = y.to_numpy() |
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) |
|
model = LogisticRegression(max_iter=2000) |
|
model.fit(X_train, y_train) |
|
y_pred = model.predict(X_test) |
|
|
|
def get_feat(): |
|
feats = [abs(x) for x in model.coef_[0]] |
|
max_val = max(feats) |
|
idx = feats.index(max_val) |
|
return data.columns[idx] |
|
|
|
acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + "%" |
|
most_imp_feat = get_feat() |
|
|
|
def predict(*args): |
|
try: |
|
features = [] |
|
for colname, arg in zip(data.columns[:-1], args): |
|
if arg is None or pd.isna(arg): |
|
return "Please fill in all fields" |
|
|
|
if colname in cat_value_dicts: |
|
if arg not in cat_value_dicts[colname]: |
|
return f"Invalid value for {colname}" |
|
features.append(cat_value_dicts[colname][arg]) |
|
else: |
|
try: |
|
features.append(float(arg)) |
|
except: |
|
return f"Invalid numeric value for {colname}" |
|
|
|
result = model.predict([features]) |
|
return cat_value_dicts[final_colname][result[0]] |
|
except Exception as e: |
|
return f"Error making prediction: {str(e)}" |
|
|
|
|
|
with gr.Blocks() as block: |
|
gr.Markdown("# Club Recommendation System") |
|
gr.Markdown("Take the quiz to get a personalized club recommendation using AI.") |
|
|
|
with gr.Row(): |
|
with gr.Column(variant="panel"): |
|
inputls = [] |
|
for colname in data.columns[:-1]: |
|
if colname in cat_value_dicts: |
|
choices = list(cat_value_dicts[colname].keys()) |
|
inputls.append(gr.Dropdown( |
|
choices=choices, |
|
label=colname, |
|
type="value" |
|
)) |
|
else: |
|
inputls.append(gr.Number(label=colname)) |
|
gr.Markdown("<br />") |
|
|
|
submit = gr.Button("Click to see your personalized result!", variant="primary") |
|
gr.Markdown("<br />") |
|
output = gr.Textbox( |
|
label="Your recommendation:", |
|
placeholder="your recommendation will appear here" |
|
) |
|
|
|
submit.click(fn=predict, inputs=inputls, outputs=output) |
|
gr.Markdown("<br />") |
|
|
|
with gr.Row(): |
|
with gr.Column(variant="panel"): |
|
gr.Markdown(f"### Model Accuracy\n{acc}") |
|
with gr.Column(variant="panel"): |
|
gr.Markdown(f"### Most Important Feature\n{most_imp_feat}") |
|
|
|
gr.Markdown("<br />") |
|
|
|
with gr.Column(variant="panel"): |
|
gr.Markdown('''⭐ Note that model accuracy is based on the training data and reflects how well |
|
the AI model can give correct recommendations for <em>that dataset</em>.''') |
|
|
|
with gr.Column(variant="panel"): |
|
gr.Markdown(""" |
|
# About the Club Recommendation System |
|
|
|
This system uses machine learning to suggest clubs based on your preferences and personality. |
|
Fill out the questionnaire on the left to get your personalized recommendation. |
|
|
|
The system considers: |
|
- Your social preferences |
|
- Activity preferences |
|
- Personal strengths |
|
- Time commitment |
|
|
|
Remember that this is just a suggestion - you should always choose the club that interests you most! |
|
""") |
|
|
|
block.launch() |