Spaces:

chiichann
/

student_performance_prediction_app

Sleeping

App Files Files Community

student_performance_prediction_app / app.py

chiichann

Update app.py

55b17cf verified 4 months ago

raw

history blame contribute delete

4.83 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	from sklearn.model_selection import train_test_split
	from xgboost import XGBClassifier
	from sklearn.preprocessing import StandardScaler
	import altair as alt

	# 🎓 App Title
	st.title("📊 Student Performance Prediction App")

	# 📚 About the App
	st.write(
	"""
	## About This App
	This application predicts whether a student will pass or fail based on their exam scores and demographic data.
	### Features:
	- Dataset Overview: View the number of students categorized by performance.
	- Model Evaluation: Check the model's accuracy on the test set.
	- Student Performance Prediction: Enter student details and get a prediction.

	The app uses Streamlit for the UI and XGBoostClassifier for predictions.
	"""
	)

	# 📌 Load and preprocess data
	def load_data():
	file_path = "exams.csv"
	df = pd.read_csv(file_path)

	# Define target variable: Pass if average score >= 50
	df["Average Score"] = df[["math score", "reading score", "writing score"]].mean(axis=1)
	df["Passed"] = (df["Average Score"] >= 50).astype(int)

	# Drop unnecessary columns
	df.drop(columns=["Average Score", "lunch", "race/ethnicity", "gender"], inplace=True)

	# Encode categorical variables
	cat_cols = ["parental level of education", "test preparation course"]
	df = pd.get_dummies(df, columns=cat_cols)

	# Standardize numerical features
	scaler = StandardScaler()
	numerical_features = ["math score", "reading score", "writing score"]
	df[numerical_features] = scaler.fit_transform(df[numerical_features])

	return df, scaler, numerical_features

	# Train the model
	def train_model(df):
	X = df.drop(columns=["Passed"])
	y = df["Passed"]

	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

	model = XGBClassifier(
	n_estimators=1000, learning_rate=0.03, max_depth=10,
	colsample_bytree=0.9, subsample=0.9, random_state=42
	)
	model.fit(X_train, y_train)
	return model, X_test, y_test, X_train.columns, X_train, scaler

	# Evaluate the model
	def evaluate_model(model, X_test, y_test):
	accuracy = model.score(X_test, y_test)
	return accuracy

	df, scaler, numerical_features = load_data()
	model, X_test, y_test, feature_names, X_train, scaler = train_model(df)
	accuracy = evaluate_model(model, X_test, y_test)

	# 🏡 Streamlit Tabs
	tab1, tab2, tab3 = st.tabs(["💁 Dataset Overview", "📊 Model Performance", "🎓 Predict Performance"])

	# 📁 Tab 1: Dataset Overview
	with tab1:
	st.write("### Dataset Summary")
	st.write(df.describe())

	st.write("### Distribution of Passed Students")
	pass_counts = df["Passed"].value_counts().reset_index()
	pass_counts.columns = ["Passed", "Count"]

	chart = alt.Chart(pass_counts).mark_bar().encode(
	x=alt.X("Passed:N", title="Passed (0 = No, 1 = Yes)"),
	y="Count",
	color="Passed:N"
	)
	st.altair_chart(chart, use_container_width=True)

	# 📊 Tab 2: Model Performance
	with tab2:
	st.write("### Model Evaluation")
	st.write(f"✅ Model Accuracy: {accuracy*100:.2f}%")

	# 🎓 Tab 3: Predict Performance
	with tab3:
	st.write("### Enter Student Details")

	math_score = st.number_input("Math Score", min_value=0, max_value=100, value=70)
	reading_score = st.number_input("Reading Score", min_value=0, max_value=100, value=70)
	writing_score = st.number_input("Writing Score", min_value=0, max_value=100, value=70)
	parent_education = st.selectbox("Parental Level of Education", ["Some high school", "High school", "Some college", "Associate's degree", "Bachelor's degree", "Master's degree"])
	test_prep = st.selectbox("Test Preparation Course", ["None", "Completed"])

	# Convert inputs to match model encoding
	input_data = pd.DataFrame({
	"math score": [math_score],
	"reading score": [reading_score],
	"writing score": [writing_score]
	})

	# Standardize numerical inputs
	input_data[numerical_features] = scaler.transform(input_data[numerical_features])

	# Add categorical columns dynamically
	for col in feature_names:
	if col.startswith("parental level of education_") or col.startswith("test preparation course_"):
	input_data[col] = 0

	input_data[f"parental level of education_{parent_education}"] = 1
	input_data[f"test preparation course_{test_prep}"] = 1

	# Fill missing encoded columns with 0
	input_data = input_data.reindex(columns=feature_names, fill_value=0)

	if st.button("Predict"):
	prediction = model.predict(input_data)[0]
	result = "Pass" if prediction == 1 else "Fail"
	st.subheader(f"Prediction: {result}")