Spaces:

aiEDUcurriculum
/

introtoAI-clubs-project

Running

App Files Files Community

introtoAI-clubs-project / app.py

aiEDUcurriculum

Update app.py

7273ceb verified 5 months ago

raw

history blame contribute delete

5.76 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	from sklearn.model_selection import train_test_split
	from sklearn.linear_model import LogisticRegression
	from sklearn import metrics

	# Load dataset
	print("Loading data...")
	data = pd.read_csv('data.csv')
	print(f"Initial shape: {data.shape}")

	# Remove timestamp and any rows with missing values
	data = data.iloc[:, 1:]
	data = data.dropna()
	print(f"Shape after removing timestamp and NaN: {data.shape}")

	# Create transformed dataframe
	transformed_data = pd.DataFrame()
	cat_value_dicts = {}
	final_colname = data.columns[-1]

	print("\nProcessing columns:")
	for colname in data.columns:
	print(f"\nColumn: {colname}")
	print(f"Unique values: {data[colname].unique()}")

	if pd.api.types.is_numeric_dtype(data[colname]):
	transformed_data[colname] = data[colname]
	print("Numeric column - copied directly")
	continue

	# Handle categorical variables
	unique_vals = sorted(data[colname].dropna().unique())
	print(f"Categorical values: {unique_vals}")

	if colname == final_colname:
	# For target column, create both mappings
	val_dict = {val: idx for idx, val in enumerate(unique_vals)}
	cat_value_dicts[colname] = {idx: val for idx, val in enumerate(unique_vals)}
	else:
	# For feature columns, create forward mapping only
	val_dict = {val: idx for idx, val in enumerate(unique_vals)}
	cat_value_dicts[colname] = val_dict

	transformed_data[colname] = data[colname].map(val_dict)
	print(f"Mapping created: {val_dict}")

	print("\nChecking for NaN values in transformed data:")
	print(transformed_data.isnull().sum())

	# Remove any remaining NaN values
	transformed_data = transformed_data.dropna()
	print(f"\nFinal transformed shape: {transformed_data.shape}")

	# Separate features and target
	X = transformed_data.iloc[:, :-1]
	y = transformed_data.iloc[:, -1]

	print(f"\nFeatures shape: {X.shape}")
	print(f"Target shape: {y.shape}")

	# Convert to numpy arrays
	X = X.to_numpy()
	y = y.to_numpy()

	# Split and train
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
	model = LogisticRegression(max_iter=2000)
	model.fit(X_train, y_train)
	y_pred = model.predict(X_test)

	def get_feat():
	feats = [abs(x) for x in model.coef_[0]]
	max_val = max(feats)
	idx = feats.index(max_val)
	return data.columns[idx]

	acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + "%"
	most_imp_feat = get_feat()

	def predict(*args):
	try:
	features = []
	for colname, arg in zip(data.columns[:-1], args):
	if arg is None or pd.isna(arg):
	return "Please fill in all fields"

	if colname in cat_value_dicts:
	if arg not in cat_value_dicts[colname]:
	return f"Invalid value for {colname}"
	features.append(cat_value_dicts[colname][arg])
	else:
	try:
	features.append(float(arg))
	except:
	return f"Invalid numeric value for {colname}"

	result = model.predict([features])
	return cat_value_dicts[final_colname][result[0]]
	except Exception as e:
	return f"Error making prediction: {str(e)}"

	# Create interface
	with gr.Blocks() as block:
	gr.Markdown("# Club Recommendation System")
	gr.Markdown("Take the quiz to get a personalized club recommendation using AI.")

	with gr.Row():
	with gr.Column(variant="panel"):
	inputls = []
	for colname in data.columns[:-1]:
	if colname in cat_value_dicts:
	choices = list(cat_value_dicts[colname].keys())
	inputls.append(gr.Dropdown(
	choices=choices,
	label=colname,
	type="value"
	))
	else:
	inputls.append(gr.Number(label=colname))
	gr.Markdown("<br />")

	submit = gr.Button("Click to see your personalized result!", variant="primary")
	gr.Markdown("<br />")
	output = gr.Textbox(
	label="Your recommendation:",
	placeholder="your recommendation will appear here"
	)

	submit.click(fn=predict, inputs=inputls, outputs=output)
	gr.Markdown("<br />")

	with gr.Row():
	with gr.Column(variant="panel"):
	gr.Markdown(f"### Model Accuracy\n{acc}")
	with gr.Column(variant="panel"):
	gr.Markdown(f"### Most Important Feature\n{most_imp_feat}")

	gr.Markdown("<br />")

	with gr.Column(variant="panel"):
	gr.Markdown('''⭐ Note that model accuracy is based on the training data and reflects how well
	the AI model can give correct recommendations for <em>that dataset</em>.''')

	with gr.Column(variant="panel"):
	gr.Markdown("""
	# About the Club Recommendation System

	This system uses machine learning to suggest clubs based on your preferences and personality.
	Fill out the questionnaire on the left to get your personalized recommendation.

	The system considers:
	- Your social preferences
	- Activity preferences
	- Personal strengths
	- Time commitment

	Remember that this is just a suggestion - you should always choose the club that interests you most!
	""")

	block.launch()