import streamlit as st import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from xgboost import XGBClassifier from sklearn.preprocessing import StandardScaler import altair as alt # 🎓 App Title st.title("📊 Student Performance Prediction App") # 📚 About the App st.write( """ ## About This App This application predicts whether a student will pass or fail based on their exam scores and demographic data. ### Features: - **Dataset Overview**: View the number of students categorized by performance. - **Model Evaluation**: Check the model's accuracy on the test set. - **Student Performance Prediction**: Enter student details and get a prediction. The app uses **Streamlit** for the UI and **XGBoostClassifier** for predictions. """ ) # 📌 Load and preprocess data def load_data(): file_path = "exams.csv" df = pd.read_csv(file_path) # Define target variable: Pass if average score >= 50 df["Average Score"] = df[["math score", "reading score", "writing score"]].mean(axis=1) df["Passed"] = (df["Average Score"] >= 50).astype(int) # Drop unnecessary columns df.drop(columns=["Average Score", "lunch", "race/ethnicity", "gender"], inplace=True) # Encode categorical variables cat_cols = ["parental level of education", "test preparation course"] df = pd.get_dummies(df, columns=cat_cols) # Standardize numerical features scaler = StandardScaler() numerical_features = ["math score", "reading score", "writing score"] df[numerical_features] = scaler.fit_transform(df[numerical_features]) return df, scaler, numerical_features # Train the model def train_model(df): X = df.drop(columns=["Passed"]) y = df["Passed"] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) model = XGBClassifier( n_estimators=1000, learning_rate=0.03, max_depth=10, colsample_bytree=0.9, subsample=0.9, random_state=42 ) model.fit(X_train, y_train) return model, X_test, y_test, X_train.columns, X_train, scaler # Evaluate the model def evaluate_model(model, X_test, y_test): accuracy = model.score(X_test, y_test) return accuracy df, scaler, numerical_features = load_data() model, X_test, y_test, feature_names, X_train, scaler = train_model(df) accuracy = evaluate_model(model, X_test, y_test) # 🏡 Streamlit Tabs tab1, tab2, tab3 = st.tabs(["💁 Dataset Overview", "📊 Model Performance", "🎓 Predict Performance"]) # 📁 Tab 1: Dataset Overview with tab1: st.write("### Dataset Summary") st.write(df.describe()) st.write("### Distribution of Passed Students") pass_counts = df["Passed"].value_counts().reset_index() pass_counts.columns = ["Passed", "Count"] chart = alt.Chart(pass_counts).mark_bar().encode( x=alt.X("Passed:N", title="Passed (0 = No, 1 = Yes)"), y="Count", color="Passed:N" ) st.altair_chart(chart, use_container_width=True) # 📊 Tab 2: Model Performance with tab2: st.write("### Model Evaluation") st.write(f"✅ **Model Accuracy:** {accuracy*100:.2f}%") # 🎓 Tab 3: Predict Performance with tab3: st.write("### Enter Student Details") math_score = st.number_input("Math Score", min_value=0, max_value=100, value=70) reading_score = st.number_input("Reading Score", min_value=0, max_value=100, value=70) writing_score = st.number_input("Writing Score", min_value=0, max_value=100, value=70) parent_education = st.selectbox("Parental Level of Education", ["Some high school", "High school", "Some college", "Associate's degree", "Bachelor's degree", "Master's degree"]) test_prep = st.selectbox("Test Preparation Course", ["None", "Completed"]) # Convert inputs to match model encoding input_data = pd.DataFrame({ "math score": [math_score], "reading score": [reading_score], "writing score": [writing_score] }) # Standardize numerical inputs input_data[numerical_features] = scaler.transform(input_data[numerical_features]) # Add categorical columns dynamically for col in feature_names: if col.startswith("parental level of education_") or col.startswith("test preparation course_"): input_data[col] = 0 input_data[f"parental level of education_{parent_education}"] = 1 input_data[f"test preparation course_{test_prep}"] = 1 # Fill missing encoded columns with 0 input_data = input_data.reindex(columns=feature_names, fill_value=0) if st.button("Predict"): prediction = model.predict(input_data)[0] result = "Pass" if prediction == 1 else "Fail" st.subheader(f"Prediction: {result}")