|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
from sklearn.model_selection import train_test_split |
|
from xgboost import XGBClassifier |
|
from sklearn.preprocessing import StandardScaler |
|
import altair as alt |
|
|
|
|
|
st.title("π Student Performance Prediction App") |
|
|
|
|
|
st.write( |
|
""" |
|
## About This App |
|
This application predicts whether a student will pass or fail based on their exam scores and demographic data. |
|
### Features: |
|
- **Dataset Overview**: View the number of students categorized by performance. |
|
- **Model Evaluation**: Check the model's accuracy on the test set. |
|
- **Student Performance Prediction**: Enter student details and get a prediction. |
|
|
|
The app uses **Streamlit** for the UI and **XGBoostClassifier** for predictions. |
|
""" |
|
) |
|
|
|
|
|
def load_data(): |
|
file_path = "exams.csv" |
|
df = pd.read_csv(file_path) |
|
|
|
|
|
df["Average Score"] = df[["math score", "reading score", "writing score"]].mean(axis=1) |
|
df["Passed"] = (df["Average Score"] >= 50).astype(int) |
|
|
|
|
|
df.drop(columns=["Average Score", "lunch", "race/ethnicity", "gender"], inplace=True) |
|
|
|
|
|
cat_cols = ["parental level of education", "test preparation course"] |
|
df = pd.get_dummies(df, columns=cat_cols) |
|
|
|
|
|
scaler = StandardScaler() |
|
numerical_features = ["math score", "reading score", "writing score"] |
|
df[numerical_features] = scaler.fit_transform(df[numerical_features]) |
|
|
|
return df, scaler, numerical_features |
|
|
|
|
|
def train_model(df): |
|
X = df.drop(columns=["Passed"]) |
|
y = df["Passed"] |
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
model = XGBClassifier( |
|
n_estimators=1000, learning_rate=0.03, max_depth=10, |
|
colsample_bytree=0.9, subsample=0.9, random_state=42 |
|
) |
|
model.fit(X_train, y_train) |
|
return model, X_test, y_test, X_train.columns, X_train, scaler |
|
|
|
|
|
def evaluate_model(model, X_test, y_test): |
|
accuracy = model.score(X_test, y_test) |
|
return accuracy |
|
|
|
df, scaler, numerical_features = load_data() |
|
model, X_test, y_test, feature_names, X_train, scaler = train_model(df) |
|
accuracy = evaluate_model(model, X_test, y_test) |
|
|
|
|
|
tab1, tab2, tab3 = st.tabs(["π Dataset Overview", "π Model Performance", "π Predict Performance"]) |
|
|
|
|
|
with tab1: |
|
st.write("### Dataset Summary") |
|
st.write(df.describe()) |
|
|
|
st.write("### Distribution of Passed Students") |
|
pass_counts = df["Passed"].value_counts().reset_index() |
|
pass_counts.columns = ["Passed", "Count"] |
|
|
|
chart = alt.Chart(pass_counts).mark_bar().encode( |
|
x=alt.X("Passed:N", title="Passed (0 = No, 1 = Yes)"), |
|
y="Count", |
|
color="Passed:N" |
|
) |
|
st.altair_chart(chart, use_container_width=True) |
|
|
|
|
|
with tab2: |
|
st.write("### Model Evaluation") |
|
st.write(f"β
**Model Accuracy:** {accuracy*100:.2f}%") |
|
|
|
|
|
with tab3: |
|
st.write("### Enter Student Details") |
|
|
|
math_score = st.number_input("Math Score", min_value=0, max_value=100, value=70) |
|
reading_score = st.number_input("Reading Score", min_value=0, max_value=100, value=70) |
|
writing_score = st.number_input("Writing Score", min_value=0, max_value=100, value=70) |
|
parent_education = st.selectbox("Parental Level of Education", ["Some high school", "High school", "Some college", "Associate's degree", "Bachelor's degree", "Master's degree"]) |
|
test_prep = st.selectbox("Test Preparation Course", ["None", "Completed"]) |
|
|
|
|
|
input_data = pd.DataFrame({ |
|
"math score": [math_score], |
|
"reading score": [reading_score], |
|
"writing score": [writing_score] |
|
}) |
|
|
|
|
|
input_data[numerical_features] = scaler.transform(input_data[numerical_features]) |
|
|
|
|
|
for col in feature_names: |
|
if col.startswith("parental level of education_") or col.startswith("test preparation course_"): |
|
input_data[col] = 0 |
|
|
|
input_data[f"parental level of education_{parent_education}"] = 1 |
|
input_data[f"test preparation course_{test_prep}"] = 1 |
|
|
|
|
|
input_data = input_data.reindex(columns=feature_names, fill_value=0) |
|
|
|
if st.button("Predict"): |
|
prediction = model.predict(input_data)[0] |
|
result = "Pass" if prediction == 1 else "Fail" |
|
st.subheader(f"Prediction: {result}") |
|
|