import gradio as gr import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn import metrics # Load dataset print("Loading data...") data = pd.read_csv('data.csv') print(f"Initial shape: {data.shape}") # Remove timestamp and any rows with missing values data = data.iloc[:, 1:] data = data.dropna() print(f"Shape after removing timestamp and NaN: {data.shape}") # Create transformed dataframe transformed_data = pd.DataFrame() cat_value_dicts = {} final_colname = data.columns[-1] print("\nProcessing columns:") for colname in data.columns: print(f"\nColumn: {colname}") print(f"Unique values: {data[colname].unique()}") if pd.api.types.is_numeric_dtype(data[colname]): transformed_data[colname] = data[colname] print("Numeric column - copied directly") continue # Handle categorical variables unique_vals = sorted(data[colname].dropna().unique()) print(f"Categorical values: {unique_vals}") if colname == final_colname: # For target column, create both mappings val_dict = {val: idx for idx, val in enumerate(unique_vals)} cat_value_dicts[colname] = {idx: val for idx, val in enumerate(unique_vals)} else: # For feature columns, create forward mapping only val_dict = {val: idx for idx, val in enumerate(unique_vals)} cat_value_dicts[colname] = val_dict transformed_data[colname] = data[colname].map(val_dict) print(f"Mapping created: {val_dict}") print("\nChecking for NaN values in transformed data:") print(transformed_data.isnull().sum()) # Remove any remaining NaN values transformed_data = transformed_data.dropna() print(f"\nFinal transformed shape: {transformed_data.shape}") # Separate features and target X = transformed_data.iloc[:, :-1] y = transformed_data.iloc[:, -1] print(f"\nFeatures shape: {X.shape}") print(f"Target shape: {y.shape}") # Convert to numpy arrays X = X.to_numpy() y = y.to_numpy() # Split and train X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) model = LogisticRegression(max_iter=2000) model.fit(X_train, y_train) y_pred = model.predict(X_test) def get_feat(): feats = [abs(x) for x in model.coef_[0]] max_val = max(feats) idx = feats.index(max_val) return data.columns[idx] acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + "%" most_imp_feat = get_feat() def predict(*args): try: features = [] for colname, arg in zip(data.columns[:-1], args): if arg is None or pd.isna(arg): return "Please fill in all fields" if colname in cat_value_dicts: if arg not in cat_value_dicts[colname]: return f"Invalid value for {colname}" features.append(cat_value_dicts[colname][arg]) else: try: features.append(float(arg)) except: return f"Invalid numeric value for {colname}" result = model.predict([features]) return cat_value_dicts[final_colname][result[0]] except Exception as e: return f"Error making prediction: {str(e)}" # Create interface with gr.Blocks() as block: gr.Markdown("# Club Recommendation System") gr.Markdown("Take the quiz to get a personalized club recommendation using AI.") with gr.Row(): with gr.Column(variant="panel"): inputls = [] for colname in data.columns[:-1]: if colname in cat_value_dicts: choices = list(cat_value_dicts[colname].keys()) inputls.append(gr.Dropdown( choices=choices, label=colname, type="value" )) else: inputls.append(gr.Number(label=colname)) gr.Markdown("
") submit = gr.Button("Click to see your personalized result!", variant="primary") gr.Markdown("
") output = gr.Textbox( label="Your recommendation:", placeholder="your recommendation will appear here" ) submit.click(fn=predict, inputs=inputls, outputs=output) gr.Markdown("
") with gr.Row(): with gr.Column(variant="panel"): gr.Markdown(f"### Model Accuracy\n{acc}") with gr.Column(variant="panel"): gr.Markdown(f"### Most Important Feature\n{most_imp_feat}") gr.Markdown("
") with gr.Column(variant="panel"): gr.Markdown('''⭐ Note that model accuracy is based on the training data and reflects how well the AI model can give correct recommendations for that dataset.''') with gr.Column(variant="panel"): gr.Markdown(""" # About the Club Recommendation System This system uses machine learning to suggest clubs based on your preferences and personality. Fill out the questionnaire on the left to get your personalized recommendation. The system considers: - Your social preferences - Activity preferences - Personal strengths - Time commitment Remember that this is just a suggestion - you should always choose the club that interests you most! """) block.launch()