Spaces:

chiichann
/

student_performance_prediction_app

Sleeping

File size: 4,830 Bytes

55b17cf
3bbe6c1
 
 
55b17cf
 
3bbe6c1
 
55b17cf
 
3bbe6c1
55b17cf
 
 
 
 
 
 
 
 
 
 
 
 
3bbe6c1
55b17cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3bbe6c1
55b17cf
 
 
 
 
3bbe6c1
55b17cf
 
 
 
 
3bbe6c1
55b17cf
3bbe6c1
55b17cf
 
 
 
3bbe6c1
55b17cf
 
 
3bbe6c1
55b17cf
 
3bbe6c1
55b17cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3bbe6c1
55b17cf
 
 
 
3bbe6c1
55b17cf

import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
import altair as alt

# 🎓 App Title
st.title("📊 Student Performance Prediction App")

# 📚 About the App
st.write(
    """
    ## About This App
    This application predicts whether a student will pass or fail based on their exam scores and demographic data.
    ### Features:
    - **Dataset Overview**: View the number of students categorized by performance.
    - **Model Evaluation**: Check the model's accuracy on the test set.
    - **Student Performance Prediction**: Enter student details and get a prediction.
    
    The app uses **Streamlit** for the UI and **XGBoostClassifier** for predictions.
    """
)

# 📌 Load and preprocess data
def load_data():
    file_path = "exams.csv"
    df = pd.read_csv(file_path)
    
    # Define target variable: Pass if average score >= 50
    df["Average Score"] = df[["math score", "reading score", "writing score"]].mean(axis=1)
    df["Passed"] = (df["Average Score"] >= 50).astype(int)
    
    # Drop unnecessary columns
    df.drop(columns=["Average Score", "lunch", "race/ethnicity", "gender"], inplace=True)
    
    # Encode categorical variables
    cat_cols = ["parental level of education", "test preparation course"]
    df = pd.get_dummies(df, columns=cat_cols)
    
    # Standardize numerical features
    scaler = StandardScaler()
    numerical_features = ["math score", "reading score", "writing score"]
    df[numerical_features] = scaler.fit_transform(df[numerical_features])
    
    return df, scaler, numerical_features

# Train the model
def train_model(df):
    X = df.drop(columns=["Passed"])
    y = df["Passed"]
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = XGBClassifier(
        n_estimators=1000, learning_rate=0.03, max_depth=10, 
        colsample_bytree=0.9, subsample=0.9, random_state=42
    )
    model.fit(X_train, y_train)
    return model, X_test, y_test, X_train.columns, X_train, scaler

# Evaluate the model
def evaluate_model(model, X_test, y_test):
    accuracy = model.score(X_test, y_test)
    return accuracy

df, scaler, numerical_features = load_data()
model, X_test, y_test, feature_names, X_train, scaler = train_model(df)
accuracy = evaluate_model(model, X_test, y_test)

# 🏡 Streamlit Tabs
tab1, tab2, tab3 = st.tabs(["💁 Dataset Overview", "📊 Model Performance", "🎓 Predict Performance"])

# 📁 Tab 1: Dataset Overview
with tab1:
    st.write("### Dataset Summary")
    st.write(df.describe())
    
    st.write("### Distribution of Passed Students")
    pass_counts = df["Passed"].value_counts().reset_index()
    pass_counts.columns = ["Passed", "Count"]
    
    chart = alt.Chart(pass_counts).mark_bar().encode(
        x=alt.X("Passed:N", title="Passed (0 = No, 1 = Yes)"),
        y="Count",
        color="Passed:N"
    )
    st.altair_chart(chart, use_container_width=True)

# 📊 Tab 2: Model Performance
with tab2:
    st.write("### Model Evaluation")
    st.write(f"✅ **Model Accuracy:** {accuracy*100:.2f}%")

# 🎓 Tab 3: Predict Performance
with tab3:
    st.write("### Enter Student Details")
    
    math_score = st.number_input("Math Score", min_value=0, max_value=100, value=70)
    reading_score = st.number_input("Reading Score", min_value=0, max_value=100, value=70)
    writing_score = st.number_input("Writing Score", min_value=0, max_value=100, value=70)
    parent_education = st.selectbox("Parental Level of Education", ["Some high school", "High school", "Some college", "Associate's degree", "Bachelor's degree", "Master's degree"])
    test_prep = st.selectbox("Test Preparation Course", ["None", "Completed"])
    
    # Convert inputs to match model encoding
    input_data = pd.DataFrame({
        "math score": [math_score],
        "reading score": [reading_score],
        "writing score": [writing_score]
    })
    
    # Standardize numerical inputs
    input_data[numerical_features] = scaler.transform(input_data[numerical_features])
    
    # Add categorical columns dynamically
    for col in feature_names:
        if col.startswith("parental level of education_") or col.startswith("test preparation course_"):
            input_data[col] = 0
    
    input_data[f"parental level of education_{parent_education}"] = 1
    input_data[f"test preparation course_{test_prep}"] = 1
    
    # Fill missing encoded columns with 0
    input_data = input_data.reindex(columns=feature_names, fill_value=0)
    
    if st.button("Predict"):
        prediction = model.predict(input_data)[0]
        result = "Pass" if prediction == 1 else "Fail"
        st.subheader(f"Prediction: {result}")